cluster/0000755000175100001440000000000012553357106011760 5ustar hornikuserscluster/po/0000755000175100001440000000000012553131040012361 5ustar hornikuserscluster/po/update-me.sh0000755000175100001440000000177512553131017014617 0ustar hornikusers#!/bin/sh # #__>> Keep in sync with ~/R/Pkgs/Matrix/po/update-me.sh <<__ # ## Script for updating package-specific *.pot files ## written such that it should work for any package # R=${R:-R} thisdir=`dirname $0` ; cd $thisdir; thisdir=`pwd` echo "R = '$R' (`$R --version | head -1`) preliminary thisdir='$thisdir'" pkgDIR=`dirname $thisdir` pkg=`basename $pkgDIR` echo ' --> pkgDIR='$pkgDIR' ; pkg='$pkg # echo ''; echo '## FIXME ## use new Scheme from R 3.0.x on' # cd `$R RHOME`/po # make pkg-update PKG=$pkg PKGDIR=$pkgDIR L=update.log Rcd="require('tools'); update_pkg_po('$pkgDIR')" ## -------------------------------- as of R 3.0.0 echo $Rcd > $L echo $Rcd | $R --slave 2>&1 | tee -a $L echo 'end{make pkg-update}' ; echo '' echo 'Test with (e.g.)' echo ' LANGUAGE=de R --no-environ --no-save' ; echo '' echo 'and then something like' echo ' Matrix(1:6, 2,3) %*% Matrix(1:4, 2)'; echo '' echo 'Commit with something like' echo " svn ci -m'translation updates' po inst/po"; echo '' cluster/po/R-fr.po0000644000175100001440000003326612461440665013557 0ustar hornikusers# Translation of src/library/Recommended/cluster/po/R-cluster.pot to German # Copyright (C) 2013 The R Foundation # This file is distributed under the same license as the R package. # Philippe.Grosjean@umons.ac.be, 2014-- msgid "" msgstr "" "Project-Id-Version: cluster 1.14.5\n" "POT-Creation-Date: 2015-01-26 14:31\n" "PO-Revision-Date: 2014-03-30 09:03+0100\n" "Last-Translator: Philippe Grosjean \n" "Language-Team: none\n" "Language: fr\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "X-Generator: Poedit 1.6.4\n" "Plural-Forms: nplurals=2; plural=(n > 1);\n" msgid "invalid clustering method" msgstr "méthode d'agrégation incorrecte" msgid "ambiguous clustering method" msgstr "méthode d'agrégation ambigüe" msgid "'par.method' must be of length 1, 3, or 4" msgstr "'par.method' doit être de longueur 1, 3 ou 4" msgid "NA-values in the dissimilarity matrix not allowed." msgstr "" "les valeurs manquantes (NA) ne sont pas autorisées dans la matrice de " "dissimilarité." msgid "'x' is not and cannot be converted to class \"dissimilarity\"" msgstr "" "'x' n'est pas et ne peux pas être converti en un objet de classe " "\"dissimilarity\"" msgid "x is not a numeric dataframe or matrix." msgstr "" "x n'est pas un tableau de données (data frame) ou une matrice numérique." msgid "need at least 2 objects to cluster" msgstr "au moins deux objets sont nécessaires pour effectuer une agrégation" msgid "No clustering performed, NA-values in the dissimilarity matrix." msgstr "" "Aucune agrégation n'est réalisée, présence de NAs dans la matrice de " "dissimilarité." msgid "'x' is a \"dist\" object, but should be a data matrix or frame" msgstr "" "'x' est un objet \"dist\", mais il faut une matrice ou un tableau de données" msgid "The number of cluster should be at least 1 and at most n-1." msgstr "Le nombre de groupes doit être compris entre 1 et n-1." msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)" msgstr "'sampsize' doit être au minimum %d = max(2, 1+ nombre de groupes)" msgid "'sampsize' = %d should not be larger than the number of objects, %d" msgstr "'sampsize' = %d ne peut être plus grand que le nombre d'objets, %d" msgid "'samples' should be at least 1" msgstr "'samples' doit valoir au moins 1" msgid "when 'medoids.x' is FALSE, 'keep.data' must be too" msgstr "lorsque 'medoids.x' est FALSE, 'keep.data' doit l'être aussi" msgid "" "Each of the random samples contains objects between which no distance can be " "computed." msgstr "" "Chacun des échantillons aléatoires contient des objets entre lesquels la " "distance ne peut être calculée." msgid "" "For each of the %d samples, at least one object was found which could not be " "assigned to a cluster (because of missing values)." msgstr "" "Dans chacun des %d échantillons, au moins un objet ne peut être assigné à un " "groupe (parce qu'il contient des valeurs manquantes)" msgid "invalid 'jstop' from .C(cl_clara,.):" msgstr "'jstop' incorrect obtenu de .C(cl_clara,.) :" msgid "'B' has to be a positive integer" msgstr "'B' doit être un entier positif" msgid "invalid 'twins' object" msgstr "objet 'twins' incorrect" msgid "x is not a dataframe or a numeric matrix." msgstr "" "x n'est pas un tableau de données (data frame) ou une matrice numérique." msgid "invalid %s; must be named list" msgstr "%s incorrect ; doit être une liste nommée" msgid "%s has invalid column names" msgstr "%s a des noms de colonnes incorrects" msgid "%s must be in 1:ncol(x)" msgstr "%s doit être compris dans 1:ncol(x)" msgid "%s must contain column names or numbers" msgstr "%s doit contenir des noms de colonnes ou des nombres" msgid "at least one binary variable has more than 2 levels." msgstr "au moins une des variables binaires a plus de deux niveaux." msgid "at least one binary variable has not 2 different levels." msgstr "au moins une variable binaire n'a pas deux nivea\tux." msgid "at least one binary variable has values not in {0,1,NA}" msgstr "au moins une variable binaire a des valeurs autres que {0,1,NA}" msgid "binary variable(s) %s treated as interval scaled" msgstr "" "la ou les variables binaires %s sont traitées comme intervalles standardisés" msgid "%s has constant columns %s; these are standardized to 0" msgstr "%s à des colonnes constantes %s ; elles sont standardisées à 0" msgid "with mixed variables, metric \"gower\" is used automatically" msgstr "" "avec des variables mélangées, la métrique \"gower\" est utilisée " "automatiquement" msgid "'weights' must be of length p (or 1)" msgstr "'weights' doit être de longueur p (ou 1)" msgid "invalid type %s for column numbers %s" msgstr "type inadéquat %s pour les numéros de colonnes %s" msgid "NA values in the dissimilarity matrix not allowed." msgstr "" "les valeurs manquantes (NA) ne sont pas admises dans la matrice de " "dissimilarité." msgid "No clustering performed, NA's in dissimilarity matrix." msgstr "" "Aucune agrégation n'est réalisée, NAs dans la matrice de dissimilarité." msgid "'x' must be numeric n x p matrix" msgstr "'x' doit être une matrice numérique n x p" msgid "omitting NAs" msgstr "valeurs NAs ignorées" msgid "no points without missing values" msgstr "aucun point sans valeurs manquantes" msgid "computed some negative or all 0 probabilities" msgstr "des probabilités négatives ou égales à zéro ont été calculées" msgid "algorithm possibly not converged in %d iterations" msgstr "l'algorithme n'a vraisemblablement pas convergé en %d itérations" msgid "'A' must be p x p cov-matrix defining an ellipsoid" msgstr "'A doit être une matrice de covariance p x p définissant un ellipsoïde" msgid "ellipsoidPoints() not yet implemented for p >= 3 dim." msgstr "ellipsoidPoints() non implémenté pour p >= 3 dim." msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}" msgstr "'k' (nombre de groupes) doit être {1,2,…, n/2 -1}" msgid "'memb.exp' must be a finite number > 1" msgstr "'memb.exp' doit être un nombre fini > 1" msgid "'maxit' must be non-negative integer" msgstr "'maxit' doit être un entier non négatif" msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1" msgstr "'iniMem.p' doit être une matrice n * k non négative avec rowSums == 1" msgid "FANNY algorithm has not converged in 'maxit' = %d iterations" msgstr "l'algorithme FANNY n'a pas convergé en 'maxit' = %d itérations" msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?" msgstr "" "les appartenances sont toutes très proches de 1/k. Essayez en diminuant " "'memb.exp' ?" msgid "'m', a membership matrix, must be nonnegative with rowSums == 1" msgstr "" "'m', une matrice d'appartenance, doit être non négative avec rowSums == 1" msgid "'n' must be >= 2" msgstr "'n\" doit être >= 2" msgid "x must be a matrix or data frame." msgstr "x doit être une matrice ou un tableau de données (data frame)." msgid "All variables must be binary (e.g., factor with 2 levels)." msgstr "" "Toutes les variables doivent être binaires (c'est-à-dire, des variables " "facteur à 2 niveaux)." msgid "No clustering performed, an object was found with all values missing." msgstr "" "Aucune agrégation n'a été effectuée, un objet a toutes ses valeurs " "manquantes." msgid "" "No clustering performed, found variable with more than half values missing." msgstr "" "Aucune agrégation n'a été effectuée, une variable a plus de la moitié de ses " "valeurs manquantes." msgid "" "No clustering performed, a variable was found with all non missing values " "identical." msgstr "" "Aucune agrégation n'a été effectuée, une variable a toutes ses valeurs non " "manquantes." msgid "No clustering performed, all variables have at least one missing value." msgstr "" "Aucune agrégation n'a été effectuée, toutes les variables ont au moins une " "valeur manquante." msgid "Cannot keep data when 'x' is a dissimilarity!" msgstr "" msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2" msgstr "Le nombre de groupes 'k' doit être dans {1,2, …, n-1} ; où n >= 2" msgid "" "'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d" msgstr "" "'medoids' doit être NULL ou un vecteur de %d valeurs d'indice distinctes " "dans {1, 2, …, n}, n=%d" msgid "No clustering performed, NAs in the computed dissimilarity matrix." msgstr "" "Aucune agrégation n'a été effectuée, NAs dans la matrice de dissimilarité " "calculée." msgid "error from .C(cl_pam, *): invalid medID's" msgstr "erreur depuis .C(cl_pam, *) : medIDs incorrects" msgid "NA-values are not allowed in dist-like 'x'." msgstr "" "des valeurs manquantes NA ne sont pas autorisées dans 'x' de type dist." msgid "Distances must be result of dist or a square matrix." msgstr "" "Les distances doivent résulter d'un objet dist ou d'une matrice carrée." msgid "the square matrix is not symmetric." msgstr "la matrice carrée n'est pas symétrique." msgid ">>>>> funny case in clusplot.default() -- please report!" msgstr "" ">>>>> cas pathologique dans clusplot.default() -- veuillez envoyer un " "rapport de bug !" msgid "x is not a data matrix" msgstr "x n'est pas une matrice de données" msgid "one or more objects contain only missing values" msgstr "un ou plusieurs objets ne contiennent que des valeurs manquantes" msgid "one or more variables contain only missing values" msgstr "une ou plusieurs variables ne contiennent que des valeurs manquantes" msgid "" "Missing values were displaced by the median of the corresponding variable(s)" msgstr "" "Les valeurs manquantes ont été remplacées par la médiane de la ou des " "variables correspondantes" msgid "x is not numeric" msgstr "x n'est pas numérique" msgid "The clustering vector is of incorrect length" msgstr "Le vecteur d'agrégation est de longueur incorrecte" msgid "NA-values are not allowed in clustering vector" msgstr "" "Les valeurs manquantes NA ne sont pas autorisées dans le vecteur d'agrégation" msgid "" "Error in Fortran routine for the spanning ellipsoid,\n" " rank problem??" msgstr "" "Erreur dans la routine Fortran pour obtenir l'ellipsoïde de dispersion,\n" " problème de rang??" msgid "'col.clus' should have length 4 when color is TRUE" msgstr "'col.clus' doit avoir une longueur de 4 lorsque color est TRUE" msgid "no diss nor data found, nor the original argument of %s" msgstr "pas de diss ou de données trouvées, même pas l'argument original de %s" msgid "no diss nor data found for clusplot()'" msgstr "pas de diss ou de données trouvées pour clusplot()'" msgid "invalid partition object" msgstr "objet de partitionnement incorrect" msgid "" "full silhouette is only available for results of 'clara(*, keep.data = TRUE)'" msgstr "" "la silhouette complète n'est disponible que pour les résultats de 'clara(*, " "keep.data = TRUE)'" msgid "'x' must only have integer codes" msgstr "'x' doit n'avoir que des codes entiers" msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'" msgstr "" "Il faut soit un objet 'dist' de dissimilarité ou une matrice de " "dissimilarité 'dmatrix'" msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'" msgstr "'dmatrix' n'est pas une matrice de dissimilarité compatible avec 'x'" msgid "clustering 'x' and dissimilarity 'dist' are incompatible" msgstr "" "l'agrégation 'x' et la matrice de dissimilarité 'dist' sont incompatibles" msgid "invalid silhouette structure" msgstr "structure de silhouette incorrecte" msgid "invalid 'silhouette' object" msgstr "objet 'silhouette' incorrect" msgid "No valid silhouette information (#{clusters} =? 1)" msgstr "Aucune valeur de silhouette n'est correcte (#{groupes} =? 1)" msgid "Observation %s has *only* NAs --> omit it for clustering" msgid_plural "Observations %s have *only* NAs --> omit them for clustering!" msgstr[0] "L'observation %s n'a *que* des NAs --> ignorée pour le regroupement" msgstr[1] "" "Les observations %s n'ont *que* des NAs --> ignorées pour le regroupement!" msgid "%d observation (%s) has *only* NAs --> omit them for clustering!" msgid_plural "" "%d observations (%s ...) have *only* NAs --> omit them for clustering!" msgstr[0] "" "%d observation (%s) n'a *que* des NAs --> ignorée pour le regroupement!" msgstr[1] "" "%d observations (%s) n'ont *que* des NAs --> ignorées pour le regroupement!" msgid "setting 'logical' variable %s to type 'asymm'" msgid_plural "setting 'logical' variables %s to type 'asymm'" msgstr[0] "la variable 'logical' %s est transformée en type 'asymm'" msgstr[1] "les variable 'logical' %s sont transformées en type 'asymm'" #~ msgid "NAdiss" #~ msgstr "NAdiss" #~ msgid "non.diss" #~ msgstr "non.diss" #~ msgid "no distance can be computed." #~ msgstr "aucune distance n'a été calculée." #~ msgid "For each of the" #~ msgstr "Pour chacun des" #~ msgid "" #~ "samples, at least one object was found which\n" #~ " could not" #~ msgstr "" #~ "échantillons, au moins un objet a été trouvé qui\n" #~ " ne peut" #~ msgid "be assigned to a cluster (because of missing values)." #~ msgstr "être assigné à un groupe (à cause de valeurs manquantes)." #~ msgid "invalid" #~ msgstr "incorrect" #~ msgid "type" #~ msgstr "type" #~ msgid "type$" #~ msgstr "type$" #~ msgid "binary variable(s)" #~ msgstr "variable(s) binaire(s)" #~ msgid "x" #~ msgstr "x" #~ msgid "has constant columns" #~ msgstr "a des colonnes constantes" #~ msgid "invalid type" #~ msgstr "type incorrect" #~ msgid "possibly not converged in" #~ msgstr "probablement pas de convergence en" #~ msgid "iterations" #~ msgstr "itérations" #~ msgid "'medoids' must be NULL or vector of" #~ msgstr "'medoids' doit être NULL ou un vecteur de" #~ msgid "rank problem??" #~ msgstr "problème de rang ??" #~ msgid "'clara(*, keep.data = TRUE)'" #~ msgstr "'clara(*, keep.data = TRUE)'" cluster/po/de.po0000644000175100001440000000326312466225010013321 0ustar hornikusers# # Translation of src/library/Recommended/cluster/po/cluster.pot to German # # Copyright (C) 2013 The R Foundation # # This file is distributed under the same license as the R package. # # Detlef Steuer , 2013-2015. msgid "" msgstr "" "Project-Id-Version: cluster 2.0.1\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2015-02-02 14:29+0100\n" "PO-Revision-Date: 2015-02-02 12:30+0100\n" "Last-Translator: Detlef Steuer \n" "Language-Team: R Core \n" "Language: DE\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=n == 1 ? 0 : 1;\n" #: clara.c:96 #, c-format msgid "C level clara(): random k=%d > n **\n" msgstr "C Level clara(): random k=%d > n **\n" #: clara.c:307 #, c-format msgid "" "clara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'" msgstr "" "clara()'s C Level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) ergab 'toomany_NA'" #: clara.c:341 clara.c:346 #, c-format msgid "C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d" msgstr "C Level dysta2(): nsel[%s= %d] = %d ist außerhalb von 0..n, n=%d" #: pam.c:154 msgid "Invalid 'medoids'" msgstr "unzulässige 'medoids'" #: pam.c:646 #, c-format msgid "pam(): Bug in C level cstat(), k=%d: ntt=0" msgstr "pam(): Bug in C Level cstat(), k=%d: ntt=0" #: twins.c:153 #, c-format msgid "" "agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%g" msgstr "" "agnes(method=%d, par.method=*) führte zu unzulässigem Zusammenfassen;\n" "Schritt %d, D(.,.)=%g" #: twins.c:260 #, c-format msgid "invalid method (code %d)" msgstr "unzulässige Methode (Kode %d)" cluster/po/R-de.po0000644000175100001440000003326112547427120013527 0ustar hornikusers# Translation of src/library/Recommended/cluster/po/R-cluster.pot to German # Copyright (C) 2013-2015 The R Foundation # This file is distributed under the same license as the R package. # Detlef Steuer , 2013. msgid "" msgstr "" "Project-Id-Version: R 3.2.1\n" "Report-Msgid-Bugs-To: bugs.r-project.org\n" "POT-Creation-Date: 2015-01-26 14:31\n" "PO-Revision-Date: 2014-03-28 10:16+0100\n" "Last-Translator: Detlef Steuer \n" "Language-Team: R Core Team = 3 dim." msgstr "ellipsoidPoints() noch nicht für Dimensionen p>=3 implementiert" msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}" msgstr "'k' (Anzahl Cluster) muss aus {1, 2, ..., n/2 -1} sein" msgid "'memb.exp' must be a finite number > 1" msgstr "'memb.exp' muss endliche Zahl > 1 sein" msgid "'maxit' must be non-negative integer" msgstr "'maxit' muss nicht-negative Zahl sein" msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1" msgstr "" "'iniMem.p' muss eine nicht-negative n x k Matrix mit Zeilensummen == 1 sein" msgid "FANNY algorithm has not converged in 'maxit' = %d iterations" msgstr "FANNY Algorithmus ist in 'maxit' = %d Iterationen nicht konvergiert" msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?" msgstr "" "die Mitgliedswerte sind alle sehr nah an 1/k. Evtl. 'memb.exp' reduzieren?" msgid "'m', a membership matrix, must be nonnegative with rowSums == 1" msgstr "" "'m' ist eine Mitgliedswertmatrix, muss nicht-negativ sein mit Zeilensummen " "== 1" msgid "'n' must be >= 2" msgstr "'n' muss >= 2 sein" msgid "x must be a matrix or data frame." msgstr "x muss eine Matrix oder Dataframe sein" msgid "All variables must be binary (e.g., factor with 2 levels)." msgstr "Alle Variablen müssen binär sein (z.B. Faktor mit 2 Stufen)." msgid "No clustering performed, an object was found with all values missing." msgstr "" "Keine Clusterung durchgeführt. Objekt gefunden, bei dem alle Werte fehlend " "sind." msgid "" "No clustering performed, found variable with more than half values missing." msgstr "" "Keine Clusterung durchgeführt. Variable gefunden, mit mehr als der Hälfte " "fehlenden Werten." msgid "" "No clustering performed, a variable was found with all non missing values " "identical." msgstr "" "Keine Clusterung durchgeführt. Variable gefunden, bei der alle nicht " "fehlenden Werte identisch sind." msgid "No clustering performed, all variables have at least one missing value." msgstr "" "Keine Clusterung durchgeführt. Alle Variablen haben mindestens einen " "fehlenden Wert." msgid "Cannot keep data when 'x' is a dissimilarity!" msgstr "Kann Datenmatrix 'data' nicht beibehalten wenn 'x' eine 'dissimilarity' ist!" msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2" msgstr "Anzahl der Cluster 'k' muss auch {1, 2, ..., n-1} sein; deshalb n >= 2" msgid "" "'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d" msgstr "" "'medoids' muss NULL oder ein Vektor von %d verschiedenen Indizes aus " "{1, 2,..., n}, n=%d sein" msgid "No clustering performed, NAs in the computed dissimilarity matrix." msgstr "" "Keine Clusterung durchgeführt, NAs in der berechneten Unähnlichkeitsmatrix." msgid "error from .C(cl_pam, *): invalid medID's" msgstr "Fehler aus .C(cl_pam, *): unzulässige medID's" msgid "NA-values are not allowed in dist-like 'x'." msgstr "NAs nicht erlaubt in dist-ähnlichem 'x'." msgid "Distances must be result of dist or a square matrix." msgstr "" "Distanzen müssen ein Ergebnis von dist oder eine quadratische Matrix sein." msgid "the square matrix is not symmetric." msgstr "Die quadratische Matrix ist nicht symmetrisch." msgid ">>>>> funny case in clusplot.default() -- please report!" msgstr "" ">>>>> komische Sache in clusplot.default() -- bitte an den Entwickler senden!" msgid "x is not a data matrix" msgstr "x ist keine Datenmatrix" msgid "one or more objects contain only missing values" msgstr "eines oder mehrere Objekte enthalten nur fehlende Werte" msgid "one or more variables contain only missing values" msgstr "eine oder mehrere Variablen enthalten nur fehlende Werte" msgid "" "Missing values were displaced by the median of the corresponding variable(s)" msgstr "" "Fehlende Werte wurden durch den Median der korrespondierenden Variable(n) " "ersetzt" msgid "x is not numeric" msgstr "x ist nicht numerisch" msgid "The clustering vector is of incorrect length" msgstr "Der Clustervektor hat eine falsche Länge" msgid "NA-values are not allowed in clustering vector" msgstr "NAs im Clustervektor nicht erlaubt" msgid "" "Error in Fortran routine for the spanning ellipsoid,\n" " rank problem??" msgstr "Fehler im Fortran-Kode für den aufspannenden Ellipsoiden, Rangproblem?" msgid "'col.clus' should have length 4 when color is TRUE" msgstr "'col.clus' sollte Länge 4 haben, wenn color auf TRUE gesetzt ist" msgid "no diss nor data found, nor the original argument of %s" msgstr "" "weder diss noch data gefunden, ebensowenig das ursprüngliche Argument " "von %s" msgid "no diss nor data found for clusplot()'" msgstr "weder diss noch data für 'clusplot()' gefunden" msgid "invalid partition object" msgstr "unzulässiges Partitionsobjekt" msgid "" "full silhouette is only available for results of 'clara(*, keep.data = TRUE)'" msgstr "" "die volle Silhoutte ist nur verfügbar für Resultate von 'clara(*, keep." "data=TRUE)'" msgid "'x' must only have integer codes" msgstr "'x' darf nur ganzzahlige Kodes enthalten" msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'" msgstr "" "Benötige entweder Unähnlichkeitsmatrix 'dist' oder diss.matrix 'dmatrix'" msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'" msgstr "'dmatrix' ist keine zu 'x' kompatible Unähnlichkeitsmatrix " msgid "clustering 'x' and dissimilarity 'dist' are incompatible" msgstr "Clusterung 'x' und Unähnlichkeitsmatrix 'dist' sind inkompatibel" msgid "invalid silhouette structure" msgstr "unzulässige Silhouttenstruktur" msgid "invalid 'silhouette' object" msgstr "unzulässiges 'silhouette' Objekt" msgid "No valid silhouette information (#{clusters} =? 1)" msgstr "keine gültige Silhouetteninformation (#{clusters} =? 1)" msgid "Observation %s has *only* NAs --> omit it for clustering" msgid_plural "Observations %s have *only* NAs --> omit them for clustering!" msgstr[0] "Beobachtung %s hat *nur* NAs --> ausgelassen für Clustering" msgstr[1] "Beobachtungen %s haben *nur* NAs --> ausgelassen für Clustering" msgid "%d observation (%s) has *only* NAs --> omit them for clustering!" msgid_plural "" "%d observations (%s ...) have *only* NAs --> omit them for clustering!" msgstr[0] "%d Beobachtung (%s) hat *nur* NAs --> ausgelassen für Clustering" msgstr[1] "" "%d Beobachtungen (%s) haben *nur* NAs --> ausgelassen für Clustering" msgid "setting 'logical' variable %s to type 'asymm'" msgid_plural "setting 'logical' variables %s to type 'asymm'" msgstr[0] "setze 'logical' Variable %s auf Typ 'asymm'" msgstr[1] "setze 'logical' Variablen %s auf Typ 'asymm'" #~ msgid "NAdiss" #~ msgstr "NAdiss" #~ msgid "non.diss" #~ msgstr "non.diss" #~ msgid "no distance can be computed." #~ msgstr "keine Entfernung berechnent werden kann" #~ msgid "For each of the" #~ msgstr "Für jede der" #~ msgid "" #~ "samples, at least one object was found which\n" #~ " could not" #~ msgstr "Stichproben wurde mindestens ein Objekt gefunden, das nicht" #~ msgid "be assigned to a cluster (because of missing values)." #~ msgstr "einem Cluster zugeordnet werden konnte (wegen fehlender Werte)" #~ msgid "invalid" #~ msgstr "unzulässiger" #~ msgid "type" #~ msgstr "Typ" #~ msgid "type$" #~ msgstr "type$" #~ msgid "binary variable(s)" #~ msgstr "binäre Variable(n)" #~ msgid "x" #~ msgstr "x" #~ msgid "has constant columns" #~ msgstr "hat konstante Spalten" #~ msgid "invalid type" #~ msgstr "unzulässiger Typ" #~ msgid "possibly not converged in" #~ msgstr "evtl nicht konvergiert in " #~ msgid "iterations" #~ msgstr "Iterationen" #~ msgid "'medoids' must be NULL or vector of" #~ msgstr "'medoids' muss NULL sein oder ein Vektor von" #~ msgid "rank problem??" #~ msgstr "evtl. Probleme mit dem Rang?" #~ msgid "'clara(*, keep.data = TRUE)'" #~ msgstr "'clara(*, keep.data = TRUE)'" #~ msgid "" #~ "No clustering performed, a variable was found with at least 50% missing " #~ "values." #~ msgstr "" #~ "Keine Clusterung durchgeführt. Variable mit mehr als 50% fehlender Werte." #~ msgid "No clustering performed," #~ msgstr "Clustering nicht durchgeführt," #~ msgid "Observations %s" #~ msgstr "Beobachtungen %s" #~ msgid "%d observations (%s ...)" #~ msgstr "%d Beobachtungen (%s ...)" #~ msgid "have *only* NAs --> na.omit() them for clustering!" #~ msgstr "haben *nur* NAs --> na.omit() diese für das Clustern" #~ msgid "s" #~ msgstr "n" #~ msgid "to type 'asymm'" #~ msgstr "auf Typ 'asymm'" cluster/po/ko.po0000644000175100001440000000330212466225010013334 0ustar hornikusers# Korean translations for cluster package. # Recommended/cluster/po/ko.po # Maintainer: Martin Maechler # # This file is distributed under the same license as the R cluster package. # Chel Hee Lee , 2013-2015. # Reviewing process is completed (15-JAN-2015) # QC: PASS # Freezing on 06-FEB-2015 for R-3.1.3 # msgid "" msgstr "" "Project-Id-Version: cluster 1.15.2\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2015-02-02 14:29+0100\n" "PO-Revision-Date: 2015-02-06 21:56-0600\n" "Last-Translator:Chel Hee Lee \n" "Language-Team: Chel Hee Lee \n" "Language: ko\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=1; plural=0;\n" #: clara.c:96 #, c-format msgid "C level clara(): random k=%d > n **\n" msgstr "C level clara(): random k=%d > n **\n" #: clara.c:307 #, c-format msgid "" "clara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'" msgstr "" "clara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'" #: clara.c:341 clara.c:346 #, c-format msgid "C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d" msgstr "C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d" #: pam.c:154 msgid "Invalid 'medoids'" msgstr "" #: pam.c:646 #, c-format msgid "pam(): Bug in C level cstat(), k=%d: ntt=0" msgstr "pam(): Bug in C level cstat(), k=%d: ntt=0" #: twins.c:153 #, c-format msgid "" "agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%g" msgstr "" #: twins.c:260 #, c-format msgid "invalid method (code %d)" msgstr "메소드가 올바르지 않습니다 (code %d)." cluster/po/R-en@quot.po0000644000175100001440000002637412014772531014557 0ustar hornikusers# All this catalog "translates" are quotation characters. # The msgids must be ASCII and therefore cannot contain real quotation # characters, only substitutes like grave accent (0x60), apostrophe (0x27) # and double quote (0x22). These substitutes look strange; see # http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html # # This catalog translates grave accent (0x60) and apostrophe (0x27) to # left single quotation mark (U+2018) and right single quotation mark (U+2019). # It also translates pairs of apostrophe (0x27) to # left single quotation mark (U+2018) and right single quotation mark (U+2019) # and pairs of quotation mark (0x22) to # left double quotation mark (U+201C) and right double quotation mark (U+201D). # # When output to an UTF-8 terminal, the quotation characters appear perfectly. # When output to an ISO-8859-1 terminal, the single quotation marks are # transliterated to apostrophes (by iconv in glibc 2.2 or newer) or to # grave/acute accent (by libiconv), and the double quotation marks are # transliterated to 0x22. # When output to an ASCII terminal, the single quotation marks are # transliterated to apostrophes, and the double quotation marks are # transliterated to 0x22. # msgid "" msgstr "" "Project-Id-Version: R 2.15.1\n" "Report-Msgid-Bugs-To: bugs.r-project.org\n" "POT-Creation-Date: 2012-08-21 22:49\n" "PO-Revision-Date: 2012-08-21 22:49\n" "Last-Translator: Automatically generated\n" "Language-Team: none\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Language: en\n" "Plural-Forms: nplurals=2; plural=(n != 1);\n" msgid "invalid clustering method" msgstr "invalid clustering method" msgid "ambiguous clustering method" msgstr "ambiguous clustering method" msgid "'par.method' must be of length 1, 3, or 4" msgstr "‘par.method’ must be of length 1, 3, or 4" msgid "NAdiss" msgstr "NAdiss" msgid "non.diss" msgstr "non.diss" msgid "x is not a numeric dataframe or matrix." msgstr "x is not a numeric dataframe or matrix." msgid "need at least 2 objects to cluster" msgstr "need at least 2 objects to cluster" msgid "No clustering performed, NA-values in the dissimilarity matrix." msgstr "No clustering performed, NA-values in the dissimilarity matrix." msgid "'x' is a \"dist\" object, but should be a data matrix or frame" msgstr "‘x’ is a \"dist\" object, but should be a data matrix or frame" msgid "The number of cluster should be at least 1 and at most n-1." msgstr "The number of cluster should be at least 1 and at most n-1." msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)" msgstr "‘sampsize’ should be at least %d = max(2, 1+ number of clusters)" msgid "'sampsize' = %d should not be larger than the number of objects, %d" msgstr "‘sampsize’ = %d should not be larger than the number of objects, %d" msgid "'samples' should be at least 1" msgstr "‘samples’ should be at least 1" msgid "when 'medoids.x' is FALSE, 'keep.data' must be too" msgstr "when ‘medoids.x’ is FALSE, ‘keep.data’ must be too" msgid "Each of the random samples contains objects between which" msgstr "Each of the random samples contains objects between which" msgid "no distance can be computed." msgstr "no distance can be computed." msgid "For each of the" msgstr "For each of the" msgid "" "samples, at least one object was found which\n" " could not" msgstr "" "samples, at least one object was found which\n" " could not" msgid "be assigned to a cluster (because of missing values)." msgstr "be assigned to a cluster (because of missing values)." msgid "invalid 'jstop' from .C(cl_clara,.):" msgstr "invalid ‘jstop’ from .C(cl_clara,.):" msgid "'B' has to be a positive integer" msgstr "‘B’ has to be a positive integer" msgid "invalid 'twins' object" msgstr "invalid ‘twins’ object" msgid "x is not a dataframe or a numeric matrix." msgstr "x is not a dataframe or a numeric matrix." msgid "invalid" msgstr "invalid" msgid "type" msgstr "type" msgid "; must be named list" msgstr "; must be named list" msgid "type$" msgstr "type$" msgid "has invalid column names" msgstr "has invalid column names" msgid "must be in 1:ncol(x)" msgstr "must be in 1:ncol(x)" msgid "must contain column names or numbers" msgstr "must contain column names or numbers" msgid "at least one binary variable has more than 2 levels." msgstr "at least one binary variable has more than 2 levels." msgid "at least one binary variable has not 2 different levels." msgstr "at least one binary variable has not 2 different levels." msgid "at least one binary variable has values not in {0,1,NA}" msgstr "at least one binary variable has values not in {0,1,NA}" msgid "binary variable(s)" msgstr "binary variable(s)" msgid "treated as interval scaled" msgstr "treated as interval scaled" msgid "x" msgstr "x" msgid "has constant columns" msgstr "has constant columns" msgid "; these are standardized to 0" msgstr "; these are standardized to 0" msgid "with mixed variables, metric \"gower\" is used automatically" msgstr "with mixed variables, metric \"gower\" is used automatically" msgid "'weights' must be of length p (or 1)" msgstr "‘weights’ must be of length p (or 1)" msgid "invalid type" msgstr "invalid type" msgid "for column numbers" msgstr "for column numbers" msgid "No clustering performed, NA's in dissimilarity matrix." msgstr "No clustering performed, NA's in dissimilarity matrix." msgid "'x' must be numeric n x p matrix" msgstr "‘x’ must be numeric n x p matrix" msgid "omitting NAs" msgstr "omitting NAs" msgid "no points without missing values" msgstr "no points without missing values" msgid "computed some negative or all 0 'prob'abilities" msgstr "computed some negative or all 0 'prob'abilities" msgid "possibly not converged in" msgstr "possibly not converged in" msgid "iterations" msgstr "iterations" msgid "'A' must be p x p cov-matrix defining an ellipsoid" msgstr "‘A’ must be p x p cov-matrix defining an ellipsoid" msgid "ellipsoidPoints() not yet implemented for p >= 3 dim." msgstr "ellipsoidPoints() not yet implemented for p >= 3 dim." msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}" msgstr "‘k’ (number of clusters) must be in {1,2, .., n/2 -1}" msgid "'memb.exp' must be a finite number > 1" msgstr "‘memb.exp’ must be a finite number > 1" msgid "'maxit' must be non-negative integer" msgstr "‘maxit’ must be non-negative integer" msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1" msgstr "‘iniMem.p’ must be a nonnegative n * k matrix with rowSums == 1" msgid "FANNY algorithm has not converged in 'maxit' = %d iterations" msgstr "FANNY algorithm has not converged in ‘maxit’ = %d iterations" msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?" msgstr "the memberships are all very close to 1/k. Maybe decrease ‘memb.exp’ ?" msgid "'m', a membership matrix, must be nonnegative with rowSums == 1" msgstr "'m', a membership matrix, must be nonnegative with rowSums == 1" msgid "'n' must be >= 2" msgstr "‘n’ must be >= 2" msgid "x must be a matrix or data frame." msgstr "x must be a matrix or data frame." msgid "All variables must be binary (factor with 2 levels)." msgstr "All variables must be binary (factor with 2 levels)." msgid "No clustering performed," msgstr "No clustering performed," msgid "an object was found with all values missing." msgstr "an object was found with all values missing." msgid "a variable was found with at least 50% missing values." msgstr "a variable was found with at least 50% missing values." msgid "a variable was found with all non missing values identical." msgstr "a variable was found with all non missing values identical." msgid "all variables have at least one missing value." msgstr "all variables have at least one missing value." msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2" msgstr "Number of clusters ‘k’ must be in {1,2, .., n-1}; hence n >= 2" msgid "'medoids' must be NULL or vector of" msgstr "‘medoids’ must be NULL or vector of" msgid "distinct indices in {1,2, .., n}, n=" msgstr "distinct indices in {1,2, .., n}, n=" msgid "No clustering performed, NAs in the computed dissimilarity matrix." msgstr "No clustering performed, NAs in the computed dissimilarity matrix." msgid "error from .C(cl_pam, *): invalid medID's" msgstr "error from .C(cl_pam, *): invalid medID's" msgid "NA-values are not allowed in dist-like 'x'." msgstr "NA-values are not allowed in dist-like ‘x’." msgid "Distances must be result of dist or a square matrix." msgstr "Distances must be result of dist or a square matrix." msgid "the square matrix is not symmetric." msgstr "the square matrix is not symmetric." msgid ">>>>> funny case in clusplot.default() -- please report!" msgstr ">>>>> funny case in clusplot.default() -- please report!" msgid "x is not a data matrix" msgstr "x is not a data matrix" msgid "one or more objects contain only missing values" msgstr "one or more objects contain only missing values" msgid "one or more variables contain only missing values" msgstr "one or more variables contain only missing values" msgid "" "Missing values were displaced by the median of the corresponding variable(s)" msgstr "" "Missing values were displaced by the median of the corresponding variable(s)" msgid "x is not numeric" msgstr "x is not numeric" msgid "The clustering vector is of incorrect length" msgstr "The clustering vector is of incorrect length" msgid "NA-values are not allowed in clustering vector" msgstr "NA-values are not allowed in clustering vector" msgid "Error in Fortran routine for the spanning ellipsoid," msgstr "Error in Fortran routine for the spanning ellipsoid," msgid "rank problem??" msgstr "rank problem??" msgid "'col.clus' should have length 4 when color is TRUE" msgstr "‘col.clus’ should have length 4 when color is TRUE" msgid "no diss nor data found, nor the original argument of" msgstr "no diss nor data found, nor the original argument of" msgid "no diss nor data found for clusplot()'" msgstr "no diss nor data found for clusplot()'" msgid "invalid partition object" msgstr "invalid partition object" msgid "full silhouette is only available for results of" msgstr "full silhouette is only available for results of" msgid "'clara(*, keep.data = TRUE)'" msgstr "'clara(*, keep.data = TRUE)'" msgid "'x' must only have integer codes" msgstr "‘x’ must only have integer codes" msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'" msgstr "Need either a dissimilarity ‘dist’ or diss.matrix ‘dmatrix’" msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'" msgstr "‘dmatrix’ is not a dissimilarity matrix compatible to ‘x’" msgid "clustering 'x' and dissimilarity 'dist' are incompatible" msgstr "clustering ‘x’ and dissimilarity ‘dist’ are incompatible" msgid "invalid silhouette structure" msgstr "invalid silhouette structure" msgid "invalid 'silhouette' object" msgstr "invalid ‘silhouette’ object" msgid "No valid silhouette information (#{clusters} =? 1)" msgstr "No valid silhouette information (#{clusters} =? 1)" msgid "setting 'logical' variable %s to type 'asymm'" msgid_plural "setting 'logical' variables %s to type 'asymm'" msgstr[0] "setting ‘logical’ variable %s to type ‘asymm’" msgstr[1] "setting ‘logical’ variables %s to type ‘asymm’" cluster/po/cluster.pot0000644000175100001440000000232012553131017014567 0ustar hornikusers# SOME DESCRIPTIVE TITLE. # Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER # This file is distributed under the same license as the PACKAGE package. # FIRST AUTHOR , YEAR. # #, fuzzy msgid "" msgstr "" "Project-Id-Version: cluster 2.0.3\n" "Report-Msgid-Bugs-To: \n" "POT-Creation-Date: 2015-07-20 09:32+0200\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" "Language: \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=CHARSET\n" "Content-Transfer-Encoding: 8bit\n" #: clara.c:96 #, c-format msgid "C level clara(): random k=%d > n **\n" msgstr "" #: clara.c:307 #, c-format msgid "" "clara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'" msgstr "" #: clara.c:341 clara.c:346 #, c-format msgid "C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d" msgstr "" #: pam.c:154 msgid "Invalid 'medoids'" msgstr "" #: pam.c:646 #, c-format msgid "pam(): Bug in C level cstat(), k=%d: ntt=0" msgstr "" #: twins.c:153 #, c-format msgid "" "agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%g" msgstr "" #: twins.c:260 #, c-format msgid "invalid method (code %d)" msgstr "" cluster/po/R-cluster.pot0000644000175100001440000001477312553131017015005 0ustar hornikusersmsgid "" msgstr "" "Project-Id-Version: cluster 2.0.3\n" "POT-Creation-Date: 2015-07-20 09:32\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=CHARSET\n" "Content-Transfer-Encoding: 8bit\n" msgid "invalid clustering method" msgstr "" msgid "ambiguous clustering method" msgstr "" msgid "'par.method' must be of length 1, 3, or 4" msgstr "" msgid "NA-values in the dissimilarity matrix not allowed." msgstr "" msgid "'x' is not and cannot be converted to class \"dissimilarity\"" msgstr "" msgid "x is not a numeric dataframe or matrix." msgstr "" msgid "need at least 2 objects to cluster" msgstr "" msgid "No clustering performed, NA-values in the dissimilarity matrix." msgstr "" msgid "'x' is a \"dist\" object, but should be a data matrix or frame" msgstr "" msgid "The number of cluster should be at least 1 and at most n-1." msgstr "" msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)" msgstr "" msgid "'sampsize' = %d should not be larger than the number of objects, %d" msgstr "" msgid "'samples' should be at least 1" msgstr "" msgid "when 'medoids.x' is FALSE, 'keep.data' must be too" msgstr "" msgid "Each of the random samples contains objects between which no distance can be computed." msgstr "" msgid "For each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values)." msgstr "" msgid "invalid 'jstop' from .C(cl_clara,.):" msgstr "" msgid "'B' has to be a positive integer" msgstr "" msgid "invalid 'twins' object" msgstr "" msgid "x is not a dataframe or a numeric matrix." msgstr "" msgid "invalid %s; must be named list" msgstr "" msgid "%s has invalid column names" msgstr "" msgid "%s must be in 1:ncol(x)" msgstr "" msgid "%s must contain column names or numbers" msgstr "" msgid "at least one binary variable has more than 2 levels." msgstr "" msgid "at least one binary variable has not 2 different levels." msgstr "" msgid "at least one binary variable has values not in {0,1,NA}" msgstr "" msgid "binary variable(s) %s treated as interval scaled" msgstr "" msgid "%s has constant columns %s; these are standardized to 0" msgstr "" msgid "with mixed variables, metric \"gower\" is used automatically" msgstr "" msgid "'weights' must be of length p (or 1)" msgstr "" msgid "invalid type %s for column numbers %s" msgstr "" msgid "NA values in the dissimilarity matrix not allowed." msgstr "" msgid "No clustering performed, NA's in dissimilarity matrix." msgstr "" msgid "'x' must be numeric n x p matrix" msgstr "" msgid "omitting NAs" msgstr "" msgid "no points without missing values" msgstr "" msgid "computed some negative or all 0 probabilities" msgstr "" msgid "algorithm possibly not converged in %d iterations" msgstr "" msgid "'A' must be p x p cov-matrix defining an ellipsoid" msgstr "" msgid "ellipsoidPoints() not yet implemented for p >= 3 dim." msgstr "" msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}" msgstr "" msgid "'memb.exp' must be a finite number > 1" msgstr "" msgid "'maxit' must be non-negative integer" msgstr "" msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1" msgstr "" msgid "FANNY algorithm has not converged in 'maxit' = %d iterations" msgstr "" msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?" msgstr "" msgid "'m', a membership matrix, must be nonnegative with rowSums == 1" msgstr "" msgid "'n' must be >= 2" msgstr "" msgid "x must be a matrix or data frame." msgstr "" msgid "All variables must be binary (e.g., factor with 2 levels)." msgstr "" msgid "No clustering performed, an object was found with all values missing." msgstr "" msgid "No clustering performed, found variable with more than half values missing." msgstr "" msgid "No clustering performed, a variable was found with all non missing values identical." msgstr "" msgid "No clustering performed, all variables have at least one missing value." msgstr "" msgid "Cannot keep data when 'x' is a dissimilarity!" msgstr "" msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2" msgstr "" msgid "'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d" msgstr "" msgid "No clustering performed, NAs in the computed dissimilarity matrix." msgstr "" msgid "error from .C(cl_pam, *): invalid medID's" msgstr "" msgid "NA-values are not allowed in dist-like 'x'." msgstr "" msgid "Distances must be result of dist or a square matrix." msgstr "" msgid "the square matrix is not symmetric." msgstr "" msgid ">>>>> funny case in clusplot.default() -- please report!" msgstr "" msgid "x is not a data matrix" msgstr "" msgid "one or more objects contain only missing values" msgstr "" msgid "one or more variables contain only missing values" msgstr "" msgid "Missing values were displaced by the median of the corresponding variable(s)" msgstr "" msgid "x is not numeric" msgstr "" msgid "The clustering vector is of incorrect length" msgstr "" msgid "NA-values are not allowed in clustering vector" msgstr "" msgid "Error in Fortran routine for the spanning ellipsoid,\n rank problem??" msgstr "" msgid "'col.clus' should have length 4 when color is TRUE" msgstr "" msgid "no diss nor data found, nor the original argument of %s" msgstr "" msgid "no diss nor data found for clusplot()'" msgstr "" msgid "invalid partition object" msgstr "" msgid "full silhouette is only available for results of 'clara(*, keep.data = TRUE)'" msgstr "" msgid "'x' must only have integer codes" msgstr "" msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'" msgstr "" msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'" msgstr "" msgid "clustering 'x' and dissimilarity 'dist' are incompatible" msgstr "" msgid "invalid silhouette structure" msgstr "" msgid "invalid 'silhouette' object" msgstr "" msgid "No valid silhouette information (#{clusters} =? 1)" msgstr "" msgid "Observation %s has *only* NAs --> omit it for clustering" msgid_plural "Observations %s have *only* NAs --> omit them for clustering!" msgstr[0] "" msgstr[1] "" msgid "%d observation (%s) has *only* NAs --> omit them for clustering!" msgid_plural "%d observations (%s ...) have *only* NAs --> omit them for clustering!" msgstr[0] "" msgstr[1] "" msgid "setting 'logical' variable %s to type 'asymm'" msgid_plural "setting 'logical' variables %s to type 'asymm'" msgstr[0] "" msgstr[1] "" cluster/po/R-pl.po0000644000175100001440000012405112461440665013554 0ustar hornikusersmsgid "" msgstr "" "Project-Id-Version: cluster 1.15.1\n" "Report-Msgid-Bugs-To: bugs.r-project.org\n" "POT-Creation-Date: 2015-01-26 14:31\n" "PO-Revision-Date: 2014-03-27 17:34+0100\n" "Last-Translator: Łukasz Daniel \n" "Language-Team: Łukasz Daniel \n" "Language: pl_PL\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 " "|| n%100>=20) ? 1 : 2);\n" "X-Poedit-SourceCharset: iso-8859-1\n" "X-Generator: Poedit 1.5.4\n" # cluster/R/agnes.R: 10 # stop("invalid clustering method") msgid "invalid clustering method" msgstr "niepoprawna metoda grupowania" # cluster/R/agnes.R: 11 # stop("ambiguous clustering method") msgid "ambiguous clustering method" msgstr "niejednoznaczna metoda grupowania" # cluster/R/agnes.R: 22 # stop("'par.method' must be of length 1, 3, or 4") msgid "'par.method' must be of length 1, 3, or 4" msgstr "'par.method' musi być długości 1, 3, lub 4" # cluster/R/agnes.R: 28 # stop("NA values in the dissimilarity matrix not allowed.") # cluster/R/diana.R: 11 # stop("NA values in the dissimilarity matrix not allowed.") # cluster/R/pam.R: 13 # stop("NA values in the dissimilarity matrix not allowed.") # cluster/R/fanny.R: 12 # stop("NA values in the dissimilarity matrix not allowed.") msgid "NA-values in the dissimilarity matrix not allowed." msgstr "wartości NA w macierzy różnic nie są dozwolone." # cluster/R/agnes.R: 35 # stop(gettextf("%s is not and cannot be converted to class \"dissimilarity\"", dataname)) # cluster/R/diana.R: 18 # stop(gettextf("%s is not and cannot be converted to class \"dissimilarity\"", dataname)) # cluster/R/pam.R: 20 # stop(gettextf("%s is not and cannot be converted to class \"dissimilarity\"", dataname)) # cluster/R/fanny.R: 19 # stop(gettextf("%s is not and cannot be converted to class \"dissimilarity\"", dataname)) msgid "'x' is not and cannot be converted to class \"dissimilarity\"" msgstr "" "argument 'x' nie jest i nie może być przekształcony na obiekt klasy " "\"dissimilarity\"" # cluster/R/agnes.R: 53 # stop(gettextf("%s is not a numeric dataframe or matrix.", dataname)) # cluster/R/clara.R: 15 # stop(gettextf("%s is not a numeric dataframe or matrix.", dataname)) # cluster/R/diana.R: 36 # stop(gettextf("%s is not a numeric dataframe or matrix.", dataname)) # cluster/R/pam.R: 40 # stop(gettextf("%s is not a numeric dataframe or matrix.", dataname)) # cluster/R/fanny.R: 37 # stop(gettextf("%s is not a numeric dataframe or matrix.", dataname)) msgid "x is not a numeric dataframe or matrix." msgstr "argument 'x' nie jest ramką liczbową ani też macierzą" # cluster/R/agnes.R: 68 # stop("need at least 2 objects to cluster") msgid "need at least 2 objects to cluster" msgstr "potrzeba co najmniej 2 obiektów do grupowania" # cluster/R/agnes.R: 92 # stop("No clustering performed, NA values in the dissimilarity matrix.", "\n", sep = "" ) # cluster/R/fanny.R: 120 # stop("No clustering performed, NA values in the dissimilarity matrix.") msgid "No clustering performed, NA-values in the dissimilarity matrix." msgstr "Nie wykonano grupowania, wartości NA w macierzy różnic." # cluster/R/clara.R: 13 # stop(gettextf("%s is a \"dist\" object, but should be a data matrix or frame", dataname)) msgid "'x' is a \"dist\" object, but should be a data matrix or frame" msgstr "'x' jest obiektem klasy \"dist\", ale powinien być macierzą lub ramką" # cluster/R/clara.R: 18 # stop("The number of cluster should be at least 1 and at most n-1." ) msgid "The number of cluster should be at least 1 and at most n-1." msgstr "Liczba grup powinna wynosić conajmniej 1 oraz co najwyżej n-1." # cluster/R/clara.R: 20 # stop(gettextf("'sampsize' should be at least %d = max(2, 1+ number of clusters)", max(2,k+1)), domain = "R-cluster") msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)" msgstr "'sampsize' powinien być co najmniej %d = max(2, 1+ liczba grup)" # cluster/R/clara.R: 22 # stop(gettextf("'sampsize' = %d should not be larger than the number of objects, %d", sampsize, n), domain = "R-cluster") msgid "'sampsize' = %d should not be larger than the number of objects, %d" msgstr "'sampsize' = %d nie powinien być większy niż liczba obiektów, %d" # cluster/R/clara.R: 24 # stop("'samples' should be at least 1") msgid "'samples' should be at least 1" msgstr "'samples' powinno wynosić przynajmniej 1" # cluster/R/clara.R: 32 # stop("when 'medoids.x' is FALSE, 'keep.data' must be too") msgid "when 'medoids.x' is FALSE, 'keep.data' must be too" msgstr "kiedy 'medoids.x' jest FALSE, 'keep.data' musi być również FALSE" # cluster/R/clara.R: 96 # stop("Each of the random samples contains objects between which no distance can be computed.") msgid "" "Each of the random samples contains objects between which no distance can be " "computed." msgstr "" "Każda z losowych próbek zawiera obiekty pomiędzy którymi żadna odległość nie " "może być obliczona." # cluster/R/clara.R: 98 # stop(gettextf("For each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values).", samples)) msgid "" "For each of the %d samples, at least one object was found which could not be " "assigned to a cluster (because of missing values)." msgstr "" "Dla każdej z %d próbek, co najmniej jeden obiekt został znaleziony, który " "nie mógł być przypisany do grupy (z uwagi na brakujące wartości)." # cluster/R/clara.R: 100 # stop(gettextf("invalid 'jstop' from .C(cl_clara,.): %s", res$jstop)) msgid "invalid 'jstop' from .C(cl_clara,.):" msgstr "niepoprawny 'jstop' z '.C(cl_clara,.)':" # cluster/R/clusGap.R: 20 # stop("'B' has to be a positive integer") msgid "'B' has to be a positive integer" msgstr "'B' musi być dodatnią liczbą całkowitą" # cluster/R/coef.R: 10 # stop("invalid 'twins' object") msgid "invalid 'twins' object" msgstr "niepoprawny obiekt 'twins'" # cluster/R/daisy.R: 8 # stop(gettextf("%s is not a dataframe or a numeric matrix.", dataname)) msgid "x is not a dataframe or a numeric matrix." msgstr "argument 'x' nie jest ramką danych ani też macierzą liczbową" # cluster/R/daisy.R: 15 # stop(gettextf("invalid %s; must be named list", sQuote("type"))) msgid "invalid %s; must be named list" msgstr "niepoprawne %s; musi być nazwaną listą" # cluster/R/daisy.R: 21 # stop(gettextf("%s has invalid column names", paste0("type$", nt))) msgid "%s has invalid column names" msgstr "%s posiada niepoprawne nazwy kolumn" # cluster/R/daisy.R: 25 # stop(gettextf("%s must be in 1:ncol(x)", paste0("type$", nt))) msgid "%s must be in 1:ncol(x)" msgstr "%s musi być w przedziale 1:ncol(x)" # cluster/R/daisy.R: 27 # stop(gettextf("%s must contain column names or numbers", paste0("type$", nt))) msgid "%s must contain column names or numbers" msgstr "%s musi zawierać nazwy kolumn lub liczby" # cluster/R/daisy.R: 38 # stop("at least one binary variable has more than 2 levels.") msgid "at least one binary variable has more than 2 levels." msgstr "przynajmniej jedna zmienna binarna posiada więcej niż 2 poziomy." # cluster/R/daisy.R: 40 # warning("at least one binary variable has not 2 different levels.") msgid "at least one binary variable has not 2 different levels." msgstr "przynajmniej jedna zmienna binarna nie posiada 2 różnych poziomów." # cluster/R/daisy.R: 48 # stop("at least one binary variable has values not in {0,1,NA}") msgid "at least one binary variable has values not in {0,1,NA}" msgstr "przynajmniej jedna zmienna binarna posiada wartości poza {0, 1, NA}" # cluster/R/daisy.R: 71 # warning(gettextf("binary variable(s) %s treated as interval scaled", pColl(which(tI)[iBin]))) msgid "binary variable(s) %s treated as interval scaled" msgstr "zmienne binarne %s traktowane jako interwał zostały przeskalowane" # cluster/R/daisy.R: 92 # warning(gettextf("%s has constant columns %s; these are standardized to 0", sQuote("x"), pColl(which(sx == 0)))) msgid "%s has constant columns %s; these are standardized to 0" msgstr "%s posiada stałe kolumny %s; zostały one ustandaryzowane do zera" # cluster/R/daisy.R: 102 # warning("with mixed variables, metric \"gower\" is used automatically") msgid "with mixed variables, metric \"gower\" is used automatically" msgstr "z mieszanymi zmiennymi, metryka 'gower' jest używana automatycznie" # cluster/R/daisy.R: 117 # stop("'weights' must be of length p (or 1)") msgid "'weights' must be of length p (or 1)" msgstr "'weights' musi być o długości 'p' (lub 1)" # cluster/R/daisy.R: 125 # stop(gettextf("invalid type %s for column numbers %s", type2[ina], pColl(which(is.na)))) msgid "invalid type %s for column numbers %s" msgstr "niepoprawny typ %s dla liczb kolumn %s" # cluster/R/agnes.R: 28 # stop("NA values in the dissimilarity matrix not allowed.") # cluster/R/diana.R: 11 # stop("NA values in the dissimilarity matrix not allowed.") # cluster/R/pam.R: 13 # stop("NA values in the dissimilarity matrix not allowed.") # cluster/R/fanny.R: 12 # stop("NA values in the dissimilarity matrix not allowed.") msgid "NA values in the dissimilarity matrix not allowed." msgstr "wartości NA w macierzy różnic nie są dozwolone." # cluster/R/diana.R: 76 # stop("No clustering performed, NA's in dissimilarity matrix.\n") msgid "No clustering performed, NA's in dissimilarity matrix." msgstr "Nie wykonano grupowania, wartości NA w macierzy różnic" # cluster/R/ellipsoidhull.R: 14 # stop("'x' must be numeric n x p matrix") msgid "'x' must be numeric n x p matrix" msgstr "'x' musi być liczbową macierzą n x p" # cluster/R/ellipsoidhull.R: 16 # warning("omitting NAs") msgid "omitting NAs" msgstr "pomijanie wartości NA" # cluster/R/ellipsoidhull.R: 20 # stop("no points without missing values") msgid "no points without missing values" msgstr "brak punktów bez brakujących wartości" # cluster/R/ellipsoidhull.R: 39 # stop("computed some negative or all 0 probabilities") msgid "computed some negative or all 0 probabilities" msgstr "" "niektóre wyliczone prawdopodobieństwa są ujemne lub wszystkie są zerami" # cluster/R/fanny.R: 107 # warning(gettextf( # "FANNY algorithm has not converged in 'maxit' = %d iterations", # maxit)) msgid "algorithm possibly not converged in %d iterations" msgstr "algorytm prawdopodobnie nie uzbieżnił się w %d iteracjach" # cluster/R/ellipsoidhull.R: 92 # stop("'A' must be p x p cov-matrix defining an ellipsoid") msgid "'A' must be p x p cov-matrix defining an ellipsoid" msgstr "'A' musi być macierzą kowariancji p x p określającą elipsoidę" # cluster/R/ellipsoidhull.R: 106 # stop("ellipsoidPoints() not yet implemented for p >= 3 dim.") msgid "ellipsoidPoints() not yet implemented for p >= 3 dim." msgstr "" "'ellipsoidPoints()' nie została jeszcze zaimplementowana dla p >= 3 wymiary." # cluster/R/fanny.R: 55 # stop("'k' (number of clusters) must be in {1,2, .., n/2 -1}") msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}" msgstr "'k' (liczba grup) musi mieścić się w przedziale {1,2, .., n/2 -1}" # cluster/R/fanny.R: 58 # stop("'memb.exp' must be a finite number > 1") msgid "'memb.exp' must be a finite number > 1" msgstr "'memb.exp' musi być skończoną liczbą > 1" # cluster/R/fanny.R: 60 # stop("'maxit' must be non-negative integer") msgid "'maxit' must be non-negative integer" msgstr "'maxit' musi być nieujemną liczbą całkowitą" # cluster/R/fanny.R: 69 # stop("'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1") msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1" msgstr "'iniMem.p' musi być nieujemną maceirzą n x k z rowSums == 1" # cluster/R/fanny.R: 107 # warning(gettextf( # "FANNY algorithm has not converged in 'maxit' = %d iterations", # maxit)) msgid "FANNY algorithm has not converged in 'maxit' = %d iterations" msgstr "algorytm FANNY nie uzbieżnił się w 'maxit' = %d iteracjach" # cluster/R/fanny.R: 144 # warning("the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?") msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?" msgstr "przynależności są bardzo bliskie 1/k. Może zmniejszyć 'memb.exp'?" # cluster/R/fanny.R: 241 # stop("'m', a membership matrix, must be nonnegative with rowSums == 1") msgid "'m', a membership matrix, must be nonnegative with rowSums == 1" msgstr "macierz przynależności 'm' musi być nieujemna z rowSums == 1" # cluster/R/internal.R: 18 # stop("'n' argument must be >= 2") # cluster/R/internal.R: 26 # stop("'n' argument must be >= 2") msgid "'n' must be >= 2" msgstr "argument 'n' musi być >= 2" # cluster/R/mona.R: 6 # stop("'x' must be a matrix or data frame.") msgid "x must be a matrix or data frame." msgstr "argument 'x' musi być macierzą lub ramką danych." # cluster/R/mona.R: 10 # stop("All variables must be binary (factor with 2 levels).") msgid "All variables must be binary (e.g., factor with 2 levels)." msgstr "Wszystkie zmienne muszą być binarne (czynnik z dwoma poziomami)" # cluster/R/mona.R: 40 # stop("No clustering performed, an object was found with all values missing.") msgid "No clustering performed, an object was found with all values missing." msgstr "" "Nie wykonano grupowania, znaleziono obiekt któremu brakowało wszystkich " "wartości." # cluster/R/mona.R: 40 # stop("No clustering performed, an object was found with all values missing.") msgid "" "No clustering performed, found variable with more than half values missing." msgstr "" "Nie wykonano grupowania, znaleziono obiekt któremu brakowało wszystkich " "wartości." # cluster/R/mona.R: 44 # stop("No clustering performed, a variable was found with all non missing values identical.") msgid "" "No clustering performed, a variable was found with all non missing values " "identical." msgstr "" "Nie wykonano grupowania, znaleziono zmienną z identycznymi niebrakującymi " "wartościami." # cluster/R/mona.R: 46 # stop("No clustering performed, all variables have at least one missing value.") msgid "No clustering performed, all variables have at least one missing value." msgstr "" "Nie wykonano grupowania, wszystkie zmienne mają co najmniej jedną brakującą " "wartość." msgid "Cannot keep data when 'x' is a dissimilarity!" msgstr "" # cluster/R/pam.R: 56 # stop("Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2") msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2" msgstr "" "Liczba grup 'k' musi zawierać się w zbiorze {1,2, .., n-1}; tak więc n >= 2" # cluster/R/pam.R: 64 # stop(gettextf("'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d", k, n)) msgid "" "'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d" msgstr "" "argument 'medoids' musi być wartością NULL lub wektorem %d różnych indeksów " "w {1,2, .., n}, n=%d" # cluster/R/pam.R: 109 # stop("No clustering performed, NAs in the computed dissimilarity matrix.") msgid "No clustering performed, NAs in the computed dissimilarity matrix." msgstr "Nie wykonano grupowania, wyliczono wartości NA w macierzy różnic." # cluster/R/pam.R: 116 # stop("error from .C(cl_pam, *): invalid medID's") msgid "error from .C(cl_pam, *): invalid medID's" msgstr "błąd w '.C(cl_pam, *)': niepoprawne 'medID'" # cluster/R/plotpart.R: 70 # stop("NA values are not allowed in dist-like 'x'.") msgid "NA-values are not allowed in dist-like 'x'." msgstr "wartości NA nie są dozwolone w 'x' typu odległości." # cluster/R/plotpart.R: 79 # stop("Distances must be result of dist or a square matrix.") msgid "Distances must be result of dist or a square matrix." msgstr "Odległości muszą być wynikiem 'dist' lub macierzy kwadratowej." # cluster/R/plotpart.R: 81 # stop("the square matrix is not symmetric.") msgid "the square matrix is not symmetric." msgstr "macierz kwadratowa nie jest symetryczna." # cluster/R/plotpart.R: 94 # warning(">>>>> funny case in clusplot.default() -- please report!\n") msgid ">>>>> funny case in clusplot.default() -- please report!" msgstr "" ">>>>> zabawny przypadek w 'clusplot.default()' -- proszę zgłosić raport!" # cluster/R/plotpart.R: 116 # stop("'x' is not a data matrix") msgid "x is not a data matrix" msgstr "argument 'x' nie jest macierzą danych" # cluster/R/plotpart.R: 120 # stop("one or more objects contain only missing values") msgid "one or more objects contain only missing values" msgstr "jeden lub więcej obiektów zawierają jedynie wartości brakujące" # cluster/R/plotpart.R: 122 # stop("one or more variables contain only missing values") msgid "one or more variables contain only missing values" msgstr "jeden lub więcej zmiennych zawiera jedynie wartości brakujące" # cluster/R/plotpart.R: 125 # message("Missing values were displaced by the median of the corresponding variable(s)") msgid "" "Missing values were displaced by the median of the corresponding variable(s)" msgstr "" "Brakujące wartości zostały zastąpione przez medianę odpowiednich zmiennych" # cluster/R/plotpart.R: 164 # stop("'x' is not numeric") msgid "x is not numeric" msgstr "argument 'x' nie jest liczbą" # cluster/R/plotpart.R: 174 # stop("The clustering vector is of incorrect length") msgid "The clustering vector is of incorrect length" msgstr "Wektor grupujący posiada niepoprawną długość" # cluster/R/plotpart.R: 177 # stop("NA values are not allowed in clustering vector") msgid "NA-values are not allowed in clustering vector" msgstr "wartości NA są niedozwolone w wektorze grupującym" # cluster/R/plotpart.R: 303 # warning("Error in Fortran routine for the spanning ellipsoid,\n rank problem??") msgid "" "Error in Fortran routine for the spanning ellipsoid,\n" " rank problem??" msgstr "" "Błąd w procedurze Fortran dla elipsoidy obejmującej,\n" " problem rang?" # cluster/R/plotpart.R: 353 # stop("'col.clus' argument should have length 4 when color is TRUE") msgid "'col.clus' should have length 4 when color is TRUE" msgstr "" "argument 'col.clus' powinien mieć długość 4, gdy 'color' ma wartość TRUE" # cluster/R/plotpart.R: 508 # stop(gettextf("no diss nor data found, nor the original argument of %s", deparse(x$call))) msgid "no diss nor data found, nor the original argument of %s" msgstr "nie znaleziono różnic ani danych, ani oryginalnego argumentu %s" # cluster/R/plotpart.R: 514 # stop("no diss nor data found for 'clusplot()' function") msgid "no diss nor data found for clusplot()'" msgstr "nie znaleziono różnic ani danych dla funkcji 'clusplot()'" # cluster/R/silhouette.R: 7 # stop("invalid partition object") msgid "invalid partition object" msgstr "niepoprawny obiekt podziału" # cluster/R/silhouette.R: 21 # stop("full silhouette is only available for results of 'clara(*, keep.data = TRUE)'") msgid "" "full silhouette is only available for results of 'clara(*, keep.data = TRUE)'" msgstr "" "pełna sylwetka jest dostępna jedynie dla wyników 'clara(*, keep.data = TRUE)'" # cluster/R/silhouette.R: 35 # stop("'x' must only have integer codes") # cluster/R/silhouette.R: 82 # stop("'x' must only have integer codes") msgid "'x' must only have integer codes" msgstr "'x' musi posiadać tylko kody będące liczbami całkowitymi" # cluster/R/silhouette.R: 42 # stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'") # cluster/R/silhouette.R: 94 # stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'") msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'" msgstr "Potrzeba albo różnic 'dist' lub diss.matrix 'dmatrix'" # cluster/R/silhouette.R: 44 # stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'") # cluster/R/silhouette.R: 96 # stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'") msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'" msgstr "'dmatrix' nie jest macierzą różnic kompatybilną z 'x'" # cluster/R/silhouette.R: 48 # stop("clustering 'x' and dissimilarity 'dist' are incompatible") # cluster/R/silhouette.R: 100 # stop("clustering 'x' and dissimilarity 'dist' are incompatible") msgid "clustering 'x' and dissimilarity 'dist' are incompatible" msgstr "grupowane 'x' oraz różnice 'dist' nie są kompatybilne" # cluster/R/silhouette.R: 134 # stop("invalid silhouette structure") msgid "invalid silhouette structure" msgstr "niepoprana struktura 'silhouette'" # cluster/R/silhouette.R: 158 # stop("invalid 'silhouette' object") msgid "invalid 'silhouette' object" msgstr "niepoprawny obiekt 'silhouette'" # cluster/R/silhouette.R: 210 # stop("No valid silhouette information (#{clusters} =? 1)") msgid "No valid silhouette information (#{clusters} =? 1)" msgstr "Brak poprawnej informacji o sylwetce (czy liczba grup =? 1)" # cluster/R/clara.R: 91 # stop(sprintf(ngettext(nNA, "Observation %s has *only* NAs --> omit it for clustering", "Observations %s have *only* NAs --> omit them for clustering!", domain = "R-cluster"), pasteC(i)), domain = NA) msgid "Observation %s has *only* NAs --> omit it for clustering" msgid_plural "Observations %s have *only* NAs --> omit them for clustering!" msgstr[0] "" "Obserwacja %s posiada *tylko* wartości NA --> pomijanie jej w grupowaniu" msgstr[1] "" "Obserwacje %s posiadają *tylko* wartości NA --> pomijanie ich w grupowaniu" msgstr[2] "" "Obserwacje %s posiadają *tylko* wartości NA --> pomijanie ich w grupowaniu" # cluster/R/clara.R: 93 # stop(sprintf(ngettext(nNA, "%d observation (%s) has *only* NAs --> omit them for clustering!", "%d observations (%s ...) have *only* NAs --> omit them for clustering!", domain = "R-cluster"), nNA, pasteC(i[1:12])), domain = NA) msgid "%d observation (%s) has *only* NAs --> omit them for clustering!" msgid_plural "" "%d observations (%s ...) have *only* NAs --> omit them for clustering!" msgstr[0] "" "%d obserwacja (%s) posiada *tylko* wartości NA --> pomijanie jej w grupowaniu" msgstr[1] "" "%d obserwacje (%s ...) posiadają *tylko* wartości NA --> pomijanie ich w " "grupowaniu" msgstr[2] "" "%d obserwacji (%s ...) posiadają *tylko* wartości NA --> pomijanie ich w " "grupowaniu" # cluster/R/daisy.R: 76 # warning(sprintf(ngettext(sum(ilog), # "setting 'logical' variable %s to type 'asymm'", # "setting 'logical' variables %s to type 'asymm'", domain = "R-cluster"), # pColl(which(ilog))), domain = NA) msgid "setting 'logical' variable %s to type 'asymm'" msgid_plural "setting 'logical' variables %s to type 'asymm'" msgstr[0] "ustawianie zmiennej 'logical' %s na tym 'asymm'" msgstr[1] "ustawianie zmiennych 'logical' %s na tym 'asymm'" msgstr[2] "ustawianie zmiennych 'logical' %s na tym 'asymm'" #~ msgid "NAdiss" #~ msgstr "NAdiss" #~ msgid "non.diss" #~ msgstr "non.diss" #~ msgid "no distance can be computed." #~ msgstr "żadna odległość nie może zostać obliczona." #~ msgid "For each of the" #~ msgstr "Dla każdej z" #~ msgid "" #~ "samples, at least one object was found which\n" #~ " could not" #~ msgstr "próbek, co najmniej jeden obiekt został znaleziony, który nie mógł" #~ msgid "be assigned to a cluster (because of missing values)." #~ msgstr "być przypisany do grupy (z powodu brakujących wartości)." #~ msgid "invalid" #~ msgstr "niepoprawny argument" #~ msgid "type" #~ msgstr "type" #~ msgid "type$" #~ msgstr "type$" #~ msgid "binary variable(s)" #~ msgstr "zmienne binarne" #~ msgid "x" #~ msgstr "x" #~ msgid "has constant columns" #~ msgstr "posiada stałe kolumny" #~ msgid "invalid type" #~ msgstr "niepoprawny typ" # cluster/R/ellipsoidhull.R: 42 # warning(gettextf("algorithm possibly not converged in %d iterations", maxit)) #~ msgid "possibly not converged in" #~ msgstr "algorytm prawdopodobnie nie uzbieżnił się w" # cluster/man/plot.mona.Rd: 9 # gettext("Separation step", domain = "R-cluster") # cluster/R/plothier.R: 199 # gettext("Separation step", domain = "R-cluster") #~ msgid "iterations" #~ msgstr "iteracjach" # cluster/R/pam.R: 64 # stop(gettextf("'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d", k, n)) #~ msgid "'medoids' must be NULL or vector of" #~ msgstr "'medoids' musi być wartością NULL lub wektorem" #~ msgid "rank problem??" #~ msgstr "problem rang?" #~ msgid "'clara(*, keep.data = TRUE)'" #~ msgstr "'clara(*, keep.data = TRUE)'" # cluster/R/agnes.R: 135 # gettext("Call: ", domain = "R-cluster") # cluster/R/clara.R: 141 # gettext("Call: ", domain = "R-cluster") #~ msgid "Call:" #~ msgstr "Wywołanie:" # cluster/R/agnes.R: 136 # gettext("Agglomerative coefficient: ", domain = "R-cluster") # cluster/R/agnes.R: 149 # gettext("Agglomerative coefficient: ", domain = "R-cluster") #~ msgid "Agglomerative coefficient:" #~ msgstr "Współczynnik aglomeracyjny:" # cluster/R/agnes.R: 137 # gettext("Order of objects:", domain = "R-cluster") # cluster/R/agnes.R: 150 # gettext("Order of objects:", domain = "R-cluster") # cluster/R/mona.R: 75 # gettext("Order of objects:", domain = "R-cluster") # cluster/R/diana.R: 115 # gettext("Order of objects:", domain = "R-cluster") # cluster/R/diana.R: 136 # gettext("Order of objects:", domain = "R-cluster") #~ msgid "Order of objects:" #~ msgstr "Kolejność (rząd) obiektów:" # cluster/R/agnes.R: 140 # gettext("Height (summary):", domain = "R-cluster") #~ msgid "Height (summary):" #~ msgstr "Wysokość (podsumowanie):" # cluster/R/agnes.R: 141 # gettext("Available components:", domain = "R-cluster") # cluster/R/agnes.R: 158 # gettext("Available components:", domain = "R-cluster") # cluster/R/mona.R: 82 # gettext("Available components:", domain = "R-cluster") # cluster/R/clara.R: 147 # gettext("Available components:", domain = "R-cluster") # cluster/R/clara.R: 179 # gettext("Available components:", domain = "R-cluster") # cluster/R/diana.R: 122 # gettext("Available components:", domain = "R-cluster") # cluster/R/diana.R: 143 # gettext("Available components:", domain = "R-cluster") # cluster/R/pam.R: 183 # gettext("Available components:", domain = "R-cluster") # cluster/R/pam.R: 213 # gettext("Available components:", domain = "R-cluster") # cluster/R/fanny.R: 189 # gettext("Available components:", domain = "R-cluster") # cluster/R/fanny.R: 214 # gettext("Available components:", domain = "R-cluster") #~ msgid "Available components:" #~ msgstr "Dostępne komponenty:" # cluster/R/agnes.R: 148 # gettext("Object of class 'agnes' from call:", domain = "R-cluster") #~ msgid "Object of class 'agnes' from call:" #~ msgstr "Obiekt klasy \"agnes\" z wywołania:" # cluster/R/agnes.R: 153 # gettext("Merge:", domain = "R-cluster") # cluster/R/diana.R: 113 # gettext("Merge:", domain = "R-cluster") # cluster/R/diana.R: 135 # gettext("Merge:", domain = "R-cluster") #~ msgid "Merge:" #~ msgstr "Złączenie:" # cluster/R/agnes.R: 154 # gettext("Height:", domain = "R-cluster") # cluster/R/diana.R: 118 # gettext("Height:", domain = "R-cluster") # cluster/R/diana.R: 138 # gettext("Height:", domain = "R-cluster") #~ msgid "Height:" #~ msgstr "Wysokość:" # cluster/R/clara.R: 48 # gettextf("calling .C(cl_clara, ..., DUP = %s):", doDUP, domain = "R-cluster") #~ msgid "calling .C(cl_clara, ..., DUP = %s):" #~ msgstr "wywoływanie .C(cl_clara, ..., DUP = %s):" # cluster/R/clara.R: 142 # gettext("Medoids:", domain = "R-cluster") # cluster/R/clara.R: 160 # gettext("Medoids:", domain = "R-cluster") # cluster/R/pam.R: 175 # gettext("Medoids:", domain = "R-cluster") #~ msgid "Medoids:" #~ msgstr "Medoidy:" # cluster/R/clara.R: 143 # gettext("Objective function:", domain = "R-cluster") # cluster/R/clara.R: 161 # gettext("Objective function:", domain = "R-cluster") # cluster/R/pam.R: 177 # gettext("Objective function:", domain = "R-cluster") #~ msgid "Objective function:" #~ msgstr "Funkcja celu:" # cluster/R/clara.R: 144 # gettext("Clustering vector:", domain = "R-cluster") # cluster/R/clara.R: 171 # gettext("Clustering vector:", domain = "R-cluster") # cluster/R/pam.R: 176 # gettext("Clustering vector:", domain = "R-cluster") #~ msgid "Clustering vector:" #~ msgstr "Wektor grupujący:" # cluster/R/clara.R: 145 # gettext("Cluster sizes:", domain = "R-cluster") #~ msgid "Cluster sizes:" #~ msgstr "Rozmiary grup:" # cluster/R/clara.R: 146 # gettext("Best sample:", domain = "R-cluster") # cluster/R/clara.R: 170 # gettext("Best sample:", domain = "R-cluster") #~ msgid "Best sample:" #~ msgstr "Najlepsza próbka:" # cluster/R/clara.R: 159 # gettext("Object of class 'clara' from call:", domain = "R-cluster") #~ msgid "Object of class 'clara' from call:" #~ msgstr "Obiekt klasy \"clara\" z wywołania:" # cluster/R/clara.R: 162 # gettext("Numerical information per cluster:", domain = "R-cluster") # cluster/R/pam.R: 197 # gettext("Numerical information per cluster:", domain = "R-cluster") #~ msgid "Numerical information per cluster:" #~ msgstr "Numeryczna informacja na grupę:" # cluster/R/clara.R: 165 # gettext("Average silhouette width per cluster:", domain = "R-cluster") # cluster/R/pam.R: 205 # gettext("Average silhouette width per cluster:", domain = "R-cluster") # cluster/R/fanny.R: 206 # gettext("Average silhouette width per cluster:", domain = "R-cluster") #~ msgid "Average silhouette width per cluster:" #~ msgstr "Przeciętna szerokość sylwetki na grupę:" # cluster/R/clara.R: 167 # gettext("Average silhouette width of best sample: ", domain = "R-cluster") #~ msgid "Average silhouette width of best sample:" #~ msgstr "Przeciętna szerokość sylwetki dla najlepszej próbki:" # cluster/R/clara.R: 173 # gettext("Silhouette plot information for best sample:", domain = "R-cluster") #~ msgid "Silhouette plot information for best sample:" #~ msgstr "Informacja o wykresie sylwetki dla najlepszej próbki:" # cluster/R/clusGap.R: 33 # gettextf("Clustering k = 1,2,..., K.max (= %d): .. ", K.max, domain = "R-cluster") #~ msgid "Clustering k = 1,2,..., K.max (= %d): .." #~ msgstr "Grupowanie k = 1,2,..., K.max (= %d): .." # cluster/R/clusGap.R: 36 # gettext("done", domain = "R-cluster") #~ msgid "done" #~ msgstr "wykonano" # cluster/R/clusGap.R: 46 # gettextf("Bootstrapping, b = 1,2,..., B (= %d) [one \".\" per sample]:", B, domain = "R-cluster") #~ msgid "Bootstrapping, b = 1,2,..., B (= %d) [one \".\" per sample]:" #~ msgstr "Bootstrapowanie, b = 1,2,..., B (= %d) [jeden \".\" na próbkę]:" # cluster/R/clusGap.R: 127 # gettext("Clustering Gap statistic [\"clusGap\"].", domain = "R-cluster") #~ msgid "Clustering Gap statistic [\"clusGap\"]." #~ msgstr "Statystyka przerwy grupowania [\"clusGap\"]." # cluster/R/clusGap.R: 128 # gettextf("B=%d simulated reference sets, k = 1..%d", x$B, K, domain = "R-cluster") #~ msgid "B=%d simulated reference sets, k = 1..%d" #~ msgstr "B=%d symulowane zbiory referencyjne, k = 1..%d" # cluster/R/clusGap.R: 132 # gettextf(" --> Number of clusters (method '%s', SE.factor=%g): %d", method, SE.factor, nc, domain = "R-cluster") #~ msgid "--> Number of clusters (method '%s', SE.factor=%g): %d" #~ msgstr "--> Liczba grup (metoda '%s', SE.factor=%g): %d" # cluster/R/clusGap.R: 134 # gettextf(" --> Number of clusters (method '%s'): %d", method, nc, domain = "R-cluster") #~ msgid "--> Number of clusters (method '%s'): %d" #~ msgstr "--> Liczba grup (metoda '%s'): %d" # cluster/R/daisy.R: 157 # gettext("NA values in the dissimilarity matrix!") #~ msgid "NA values in the dissimilarity matrix!" #~ msgstr "Wartości NA w macierzy odmienności!" # cluster/R/daisy.R: 171 # gettext("Dissimilarities:", domain = "R-cluster") #~ msgid "Dissimilarities:" #~ msgstr "Odmienności:" # cluster/R/daisy.R: 178 # gettext("Warning: ", domain = "R-cluster") # cluster/R/daisy.R: 208 # gettext("Warning: ", domain = "R-cluster") #~ msgid "Warning:" #~ msgstr "Ostrzeżenie:" # cluster/R/daisy.R: 179 # gettext("Metric: ", domain = "R-cluster") # cluster/R/daisy.R: 202 # gettext("Metric: ", domain = "R-cluster") #~ msgid "Metric:" #~ msgstr "Metryka:" # cluster/R/daisy.R: 181 # gettextf("Types = %s", paste(aT, collapse = ", "), domain = "R-cluster") # cluster/R/daisy.R: 204 # gettextf("Types = %s", paste(aT, collapse = ", "), domain = "R-cluster") #~ msgid "Types = %s" #~ msgstr "Typy = %s" # cluster/R/daisy.R: 183 # gettext("Number of objects:", domain = "R-cluster") # cluster/R/daisy.R: 206 # gettext("Number of objects:", domain = "R-cluster") #~ msgid "Number of objects:" #~ msgstr "Liczba obiektów:" # cluster/R/diana.R: 120 # gettext("Divisive coefficient:", domain = "R-cluster") # cluster/R/diana.R: 139 # gettext("Divisive coefficient:", domain = "R-cluster") #~ msgid "Divisive coefficient:" #~ msgstr "Współczynnik podziału:" # cluster/R/ellipsoidhull.R: 37 # gettext("Error in Fortran routine computing the spanning ellipsoid. Probably collinear data", domain = "R-cluster") #~ msgid "" #~ "Error in Fortran routine computing the spanning ellipsoid. Probably " #~ "collinear data" #~ msgstr "" #~ "Błąd w procedurze Fortran dla elipsoidy obejmującej, prawdopodobnie " #~ "współliniowe dane" #~ msgid "" #~ "ellipsoid in %d dimensions:\n" #~ " center = (%s); squared ave.radius d^2 = %s\n" #~ " and shape matrix =" #~ msgstr "" #~ "elipsoida w %d wymiarach:\n" #~ " centrum = (%s); kwadrat przeciętnego promienia d^2 = %s\n" #~ " oraz macierz kształtu =" # cluster/R/ellipsoidhull.R: 69 # gettextf(" ellipsoid's area = %s", format(volume(x), digits=digits), domain = "R-cluster") #~ msgid "ellipsoid's area = %s" #~ msgstr "powierzchnia elipsoidy = %s" # cluster/R/ellipsoidhull.R: 70 # gettextf(" ellipsoid's volume = %s", format(volume(x), digits=digits), domain = "R-cluster") #~ msgid "ellipsoid's volume = %s" #~ msgstr "objętość elipsoidy = %s" # cluster/R/ellipsoidhull.R: 73 # gettext("** Warning: ** the algorithm did not terminate reliably!\n most probably because of collinear data", domain = "R-cluster") #~ msgid "" #~ "** Warning: ** the algorithm did not terminate reliably!\n" #~ " most probably because of collinear data" #~ msgstr "" #~ "** Ostrzeżenie: ** algorytm nie zakończył się w sposób wiarygodny!\n" #~ " prawdopodobnie z powodu wspóliniowych danych" # cluster/R/ellipsoidhull.R: 75 # gettext("** Warning: ** the algorithm did not terminate reliably!\n (in the available number of iterations)", domain = "R-cluster") #~ msgid "" #~ "** Warning: ** the algorithm did not terminate reliably!\n" #~ " (in the available number of iterations)" #~ msgstr "" #~ "** Ostrzeżenie: ** algorytm nie zakończył się w sposób wiarygodny!\n" #~ " (w dostępnej liczbie iteracji)" # cluster/R/fanny.R: 172 # gettext("Fuzzy Clustering object of class 'fanny': ", domain = "R-cluster") #~ msgid "Fuzzy Clustering object of class 'fanny':" #~ msgstr "Obiekt rozmytego grupowania klasy \"fanny\":" # cluster/R/fanny.R: 179 # gettext("Membership coefficients (in percent, rounded):", domain = "R-cluster") #~ msgid "Membership coefficients (in percent, rounded):" #~ msgstr "Współczynnik członkostwa (w procentach, zaokrąglony):" # cluster/R/fanny.R: 180 # gettext("Fuzzyness coefficients:", domain = "R-cluster") #~ msgid "Fuzzyness coefficients:" #~ msgstr "Współczynniki rozmycia:" # cluster/R/fanny.R: 181 # gettext("Closest hard clustering:", domain = "R-cluster") #~ msgid "Closest hard clustering:" #~ msgstr "Najbliższe twarde grupowanie:" # cluster/R/fanny.R: 183 # gettextf("k_crisp (= %d) < k !!", x$k.crisp, domain = "R-cluster") #~ msgid "k_crisp (= %d) < k !!" #~ msgstr "k_crisp (= %d) < k !!" # cluster/R/pam.R: 203 # gettext("Silhouette plot information:", domain = "R-cluster") # cluster/R/fanny.R: 204 # gettext("Silhouette plot information:", domain = "R-cluster") #~ msgid "Silhouette plot information:" #~ msgstr "Informacje o wykresie sylwetek:" # cluster/R/pam.R: 207 # gettext("Average silhouette width of total data set:", domain = "R-cluster") # cluster/R/fanny.R: 208 # gettext("Average silhouette width of total data set:", domain = "R-cluster") #~ msgid "Average silhouette width of total data set:" #~ msgstr "Przeciętna szerokość sylwetki pełnego zbioru danych:" # cluster/R/mona.R: 42 # stop("No clustering performed, a variable was found with at least 50 percent missing values.") #~ msgid "" #~ "No clustering performed, a variable was found with at least 50 percent " #~ "missing values." #~ msgstr "" #~ "Nie wykonano grupowania, znaleziono zmienną z co najmniej 50 procent " #~ "brakujących wartości." # cluster/R/mona.R: 73 # gettext("Revised data:", domain = "R-cluster") #~ msgid "Revised data:" #~ msgstr "Przeglądnięte dane:" # cluster/R/mona.R: 78 # gettext("Variable used:", domain = "R-cluster") #~ msgid "Variable used:" #~ msgstr "Użyte zmienne:" # cluster/R/mona.R: 80 # gettext("Separation step:", domain = "R-cluster") #~ msgid "Separation step:" #~ msgstr "Krok separacji:" # cluster/R/pam.R: 198 # gettext("Isolated clusters:", domain = "R-cluster") #~ msgid "Isolated clusters:" #~ msgstr "Izolowane grupy:" #~ msgid "L-clusters:" #~ msgstr "L-grupy:" # cluster/R/pam.R: 200 # gettext(" L*-clusters: ", domain = "R-cluster") #~ msgid "L*-clusters:" #~ msgstr "L*-grupy:" # cluster/R/plothier.R: 6 # gettextf("Dendrogram of %s", paste(deparse(x$call), collapse = ""), domain = "R-cluster") # cluster/R/plothier.R: 98 # gettextf("Dendrogram of %s", cl, domain = "R-cluster") # cluster/R/plothier.R: 153 # gettextf("Dendrogram of %s", cl, domain = "R-cluster") #~ msgid "Dendrogram of %s" #~ msgstr "Dendrogram %s" # cluster/man/pltree.twins.Rd: 11 # gettext("Height", domain = "R-cluster") # cluster/man/bannerplot.Rd: 11 # gettext("Height", domain = "R-cluster") # cluster/R/plothier.R: 7 # gettext("Height", domain = "R-cluster") # cluster/R/plothier.R: 24 # gettext("Height", domain = "R-cluster") #~ msgid "Height" #~ msgstr "Wysokość" # cluster/R/plothier.R: 91 # gettextf("Agglomerative Coefficient = %s", round(x$ac, digits = 2), domain = "R-cluster") #~ msgid "Agglomerative Coefficient = %s" #~ msgstr "Współczynnik aglomeracyjny = %s" # cluster/R/plothier.R: 97 # gettextf("Banner of %s", cl, domain = "R-cluster") # cluster/R/plothier.R: 152 # gettextf("Banner of %s", cl, domain = "R-cluster") # cluster/R/plothier.R: 198 # gettextf("Banner of %s", deparse(x$call), domain = "R-cluster") #~ msgid "Banner of %s" #~ msgstr "Baner %s" # cluster/R/plothier.R: 113 # gettext("Make a plot selection (or 0 to exit):", domain = "R-cluster") # cluster/R/plothier.R: 168 # gettext("Make a plot selection (or 0 to exit):", domain = "R-cluster") # cluster/R/plotpart.R: 26 # gettext("Make a plot selection (or 0 to exit):", domain = "R-cluster") #~ msgid "Make a plot selection (or 0 to exit):" #~ msgstr "Wybierz wykres (lub 0 aby wyjść):" # cluster/R/plothier.R: 146 # gettextf("Divisive Coefficient = %s", round(x$dc, digits = 2), domain = "R-cluster") #~ msgid "Divisive Coefficient = %s" #~ msgstr "Współczynnik podziału = %s" # cluster/R/plotpart.R: 154 # gettextf("CLUSPLOT(%s)", deparse(substitute(x))) #~ msgid "CLUSPLOT(%s)" #~ msgstr "CLUSPLOT(%s)" # cluster/R/plotpart.R: 155 # gettextf("These two components explain %s percent of the point variability.", round(100 * var.dec, digits = 2)) #~ msgid "These two components explain %s percent of the point variability." #~ msgstr "Te dwa komponenty wyjaśniają %s procent zmienności punktu." # cluster/man/clusplot.default.Rd: 23 # gettext("Component 1", domain = "R-cluster") # cluster/R/plotpart.R: 156 # gettext("Component 1", domain = "R-cluster") #~ msgid "Component 1" #~ msgstr "Komponent 1" #~ msgid "Component 2" #~ msgstr "Komponent 2" # cluster/R/plotpart.R: 198 # gettextf("cluster %d has only one observation ..", i, domain = "R-cluster") #~ msgid "cluster %d has only one observation .." #~ msgstr "grupa %d ma tylko jedną obserwację .." # cluster/R/plotpart.R: 286 # gettext("span & rank2 : calling \"spannel\" ..", domain = "R-cluster") #~ msgid "span & rank2 : calling \"spannel\" .." #~ msgstr "span & rank2 : wywoływanie \"spannel\" .." # cluster/R/silhouette.R: 178 # gettextf("Silhouette of %d units in %d clusters from %s:", sum(csiz), k, deparse(x$call), domain = "R-cluster") # cluster/R/silhouette.R: 181 # gettextf("Silhouette of %d units in %d clusters from %s:", sum(csiz), k, deparse(x$call), domain = "R-cluster") #~ msgid "Silhouette of %d units in %d clusters from %s:" #~ msgstr "Sylwetka %d jednostek w %d klastrach z %s:" # cluster/R/silhouette.R: 179 # gettextf("Cluster sizes, ids = (%s), and average silhouette widths:", paste(x$codes, collapse=", "), domain = "R-cluster") # cluster/R/silhouette.R: 186 # gettextf("Cluster sizes, ids = (%s), and average silhouette widths:", paste(x$codes, collapse=", "), domain = "R-cluster") #~ msgid "Cluster sizes, ids = (%s), and average silhouette widths:" #~ msgstr "Rozmiary grup, ids = (%s), oraz przeciętne szerokości sylwetek:" # cluster/R/silhouette.R: 182 # gettextf("Cluster sizes and average silhouette widths:", domain = "R-cluster") # cluster/R/silhouette.R: 189 # gettext("Cluster sizes and average silhouette widths:", domain = "R-cluster") #~ msgid "Cluster sizes and average silhouette widths:" #~ msgstr "Rozmiary grup oraz przeciętne szerokości sylwetek:" # cluster/R/silhouette.R: 185 # gettextf("Silhouette of %d units in %d clusters:", sum(csiz), k, domain = "R-cluster") # cluster/R/silhouette.R: 188 # gettextf("Silhouette of %d units in %d clusters:", sum(csiz), k, domain = "R-cluster") #~ msgid "Silhouette of %d units in %d clusters:" #~ msgstr "Sylwetka %d jednostek w %d klastrach:" # cluster/R/silhouette.R: 194 # gettext("Individual silhouette widths:", domain = "R-cluster") #~ msgid "Individual silhouette widths:" #~ msgstr "Indywidualne szerokości sylwetki:" # cluster/R/silhouette.R: 220 # gettext("Silhouette plot", domain = "R-cluster") #~ msgid "Silhouette plot" #~ msgstr "Wykres sylwetki" # cluster/R/silhouette.R: 224 # gettextf("Silhouette plot of %s", sub("^FF","", deparse(cll)), domain = "R-cluster") #~ msgid "Silhouette plot of %s" #~ msgstr "Wykres sylwetki %s" # cluster/R/silhouette.R: 230 # gettext("Average silhouette width:", domain = "R-cluster") #~ msgid "Average silhouette width:" #~ msgstr "Przeciętna szerokość sylwetki:" # cluster/R/daisy.R: 200 # sprintf(ngettext(x$n, "%d dissimilarity, summarized:", "%d dissimilarities, summarized:", domain = "R-cluster"), x$n) #~ msgid "%d dissimilarity, summarized:" #~ msgid_plural "%d dissimilarities, summarized:" #~ msgstr[0] "%d odmienność, podsumowanie:" #~ msgstr[1] "%d odmienności, podsumowanie:" #~ msgstr[2] "%d odmienności, podsumowanie:" #~ msgid "" #~ "%d observations (%s ...)\n" #~ "\thave *only* NAs --> na.omit() them for clustering!" #~ msgstr "" #~ "%d obserwacji (%s ...)\n" #~ "\tmają *tylko* wartości NA --> pomijanie ich w grupowaniu" #~ msgid "hence, area = %s" #~ msgstr "tak więc powierzchnia = %s" #, fuzzy #~ msgid "R-cluster" #~ msgstr "L-grupy:" cluster/po/R-ko.po0000644000175100001440000003477412466225010013554 0ustar hornikusers# Korean translations for cluster package. # Recommended/cluster/po/R-ko.po # Maintainer: Martin Maechler # # This file is distributed under the same license as the R cluster package. # Chel Hee Lee , 2013-2015. # # Reviewing process is completed (15-JAN-2015). # The original source code is reviewed (26-JAN-2015). # QC: PASS # Freezing on 06-FEB-2015 for R-3.1.3 # msgid "" msgstr "" "Project-Id-Version: cluster 1.15.2\n" "POT-Creation-Date: 2015-01-30 12:14\n" "PO-Revision-Date: 2015-02-06 21:56-0600\n" "Last-Translator:Chel Hee Lee \n" "Language-Team: Chel Hee Lee \n" "Language: ko\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=1; plural=0;\n" msgid "invalid clustering method" msgstr "군집방법(clustering method)의 이름이 올바르지 않습니다." msgid "ambiguous clustering method" msgstr "불분명한 군집방법(clustering method)입니다." msgid "'par.method' must be of length 1, 3, or 4" msgstr "'par.method'의 길이는 반드시 1, 3, 또는 4이어야 합니다." msgid "NA-values in the dissimilarity matrix not allowed." msgstr "NA의 값은 비유사성 행렬(dissimilarity matrix)에 사용될 수 없습니다." msgid "'x' is not and cannot be converted to class \"dissimilarity\"" msgstr "'x'는 \"dissimilarity\"이라는 클래스가 아니거나 클래스 \"dissimilarity\"로 전환할 수 없습니다." msgid "x is not a numeric dataframe or matrix." msgstr "x는 수치형 데이터 프레임 또는 행렬이 아닙니다." msgid "need at least 2 objects to cluster" msgstr "군집(cluster)는 적어도 2개의 객체를 필요로 합니다." msgid "No clustering performed, NA-values in the dissimilarity matrix." msgstr "비유사성 행렬(dissimilarity matrix)에서 NA 값이 발견되었기 때문에 군집화 과정이 실행되지 않았습니다." msgid "'x' is a \"dist\" object, but should be a data matrix or frame" msgstr "'x'는 클래스 \"dist\"를 가지는 객체이지만, 데이터 행렬 또는 프레임이어야 합니다." msgid "The number of cluster should be at least 1 and at most n-1." msgstr "군집(cluster)의 개수는 적어도 1 이상이며 최대 n-1 이내에 있어야 합니다." msgid "'sampsize' should be at least %d = max(2, 1+ number of clusters)" msgstr "'sampsize'는 최소 %d = max(2, 1 + 군집의 개수)가 되어야 합니다." msgid "'sampsize' = %d should not be larger than the number of objects, %d" msgstr "'sampsize' = %1$d는 객체의 개수 %2$d보다 클 수 없습니다." msgid "'samples' should be at least 1" msgstr "'samples'는 적어도 1 이상 이어야 합니다." msgid "when 'medoids.x' is FALSE, 'keep.data' must be too" msgstr "'medoids.x'가 FALSE인 경우에는 'keep.data' 역시 FALSE이어야 합니다." msgid "Each of the random samples contains objects between which no distance can be computed." msgstr "각각의 무작위 표본은 서로간의 거리를 계산할 수 없는 객체들을 포함하고 있습니다." msgid "For each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values)." msgstr "%d개의 표본 각각에 대해서 결측값으로 인하여 어느 군집에도 배정할 수 없는 객체를 적어도 하나 이상 발견하였습니다." msgid "invalid 'jstop' from .C(cl_clara,.):" msgstr ".C(cl_clara,.)으로부터 얻어진 'jstop'는 다음과 같은 이유로 이상합니다: " msgid "'B' has to be a positive integer" msgstr "'B'는 반드시 양의 정수이어야 합니다." msgid "invalid 'twins' object" msgstr "올바른 'twins' 객체가 아닙니다." msgid "x is not a dataframe or a numeric matrix." msgstr "x는 데이터 프레임이 아니거나 수치형 행렬이 아닙니다." msgid "invalid %s; must be named list" msgstr "사용할 수 있는 %s가 아닙니다. 반드시 구성요소에 이름이 부여된 리스트(named list)이여야 합니다." msgid "%s has invalid column names" msgstr "%s는 올바른 열이름을 가지고 있지 않습니다." msgid "%s must be in 1:ncol(x)" msgstr "%s는 반드시 1:ncol(x)내에 있어야 합니다." msgid "%s must contain column names or numbers" msgstr "%s는 반드시 열 이름 또는 번호를 포함해야 합니다." msgid "at least one binary variable has more than 2 levels." msgstr "적어도 하나 이상의 이항변수(binary variable)가 두 가지 이상의 수준(levels)을 가지고 있습니다." msgid "at least one binary variable has not 2 different levels." msgstr "적어도 하나 이상의 이항변수(binary variable)이 서로 다른 두 가지 수준을 가지고 있지 않습니다." msgid "at least one binary variable has values not in {0,1,NA}" msgstr "적어도 하나 이상의 이항변수(binary variable)이 {0,1,NA} 외의 값을 가지고 있습니다." msgid "binary variable(s) %s treated as interval scaled" msgstr "이항변수(binary variable) %s는 구간척도(interval scale)로서 다루어집니다. " msgid "%s has constant columns %s; these are standardized to 0" msgstr "%1$s는 상수(constant)값을 가지는 열 %2$s를 가집니다. 이들은 0으로 표준화(standardized)됩니다." msgid "with mixed variables, metric \"gower\" is used automatically" msgstr "혼합형 변수(mixed variables)를 이용할 때는 metric은 자동으로 \"gower\"가 사용됩니다." msgid "'weights' must be of length p (or 1)" msgstr "'weights'의 길이는 반드시 p (또는 1)이어야 합니다." msgid "invalid type %s for column numbers %s" msgstr "행번호 %2$s에 잘못된 유형(type) %1$s이 주어졌습니다." msgid "NA values in the dissimilarity matrix not allowed." msgstr "비유사성 행렬(dissimilarity matrix)는 NA 값을 가질 수 없습니다." msgid "No clustering performed, NA's in dissimilarity matrix." msgstr "비유사성 행렬(dissimilarity matrix)에 NA가 있기 때문에, 군집화 과정이 실행되지 않았습니다." msgid "'x' must be numeric n x p matrix" msgstr "'x'는 반드시 크기가 n x p인 수치형 행렬이어야 합니다." msgid "omitting NAs" msgstr "NA를 삭제합니다." msgid "no points without missing values" msgstr "결측값들을 제외하면 사용가능한 포인트들이 없습니다." msgid "computed some negative or all 0 probabilities" msgstr "확률값이 모두 0이거나 일부가 음수로 산출되었습니다." msgid "algorithm possibly not converged in %d iterations" msgstr "알고리즘의 %d번의 반복수행에도 수렴하지 않을 수 있습니다." msgid "'A' must be p x p cov-matrix defining an ellipsoid" msgstr "'A'는 반드시 타원(ellipsoid)를 정의하는 크기가 p x p인 공분산행렬(cov-matrix)이어야 합니다." msgid "ellipsoidPoints() not yet implemented for p >= 3 dim." msgstr "ellipsoidPoints()는 p >= 3 인경우에는 아직 구현되지 않았습니다." msgid "'k' (number of clusters) must be in {1,2, .., n/2 -1}" msgstr "'k' (군집의 개수)는 반드시 {1,2, .., n/2 -1} 내에 존재해야 합니다." msgid "'memb.exp' must be a finite number > 1" msgstr "'memb.exp'는 반드시 1보다 큰 유한한(finite) 숫자이어야 합니다." msgid "'maxit' must be non-negative integer" msgstr "'maxit'은 반드시 음이 아닌 정수이어야 합니다." msgid "'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1" msgstr "'iniMem.p'는 반드시 크기가 n * k 인 비음수 행렬(nonnegative matrix)이어야 하며, 이 행렬의 rowSums == 1 이어야 합니다." msgid "FANNY algorithm has not converged in 'maxit' = %d iterations" msgstr "FANNY 알고리즘은 'maxit' = %d번의 반복수행에도 수렴하지 않았습니다." msgid "the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?" msgstr "멤버쉽(membership) 전부가 1/k에 매우 가깝습니다. 아마도 'memb.exp'를 줄여보는 것은 어떨까요?" msgid "'m', a membership matrix, must be nonnegative with rowSums == 1" msgstr "멤버쉽 행렬(membership matrix) 'm'은 반드시 음수를 가지지 않으며 rowSums == 1이어야 합니다." msgid "'n' must be >= 2" msgstr "'n'는 반드시 2보다 크거나 같아야 합니다." msgid "x must be a matrix or data frame." msgstr "x는 반드시 행렬 또는 데이터 프레임이어야 합니다." msgid "All variables must be binary (e.g., factor with 2 levels)." msgstr "모든 변수들은 반드시 2개의 수준(levels)으로 이루어진 요인(factor)이어야 합니다." msgid "No clustering performed, an object was found with all values missing." msgstr "모든 값이 결측된 객체가 발견되어 군집화 과정이 수행되지 않았습니다." msgid "No clustering performed, found variable with more than half values missing." msgstr "절반 이상의 값들이 결측된 변수가 발견되어 군집화 과정이 수행되지 않았습니다." msgid "No clustering performed, a variable was found with all non missing values identical." msgstr "결측되지 않은 모든 값들이 동일한 변수가 발견되어 군집화 과정이 수행되지 않았습니다." msgid "No clustering performed, all variables have at least one missing value." msgstr "모든 변수들이 적어도 하나 이상의 결측값을 가지기 때문에 군집화 과정이 수행되지 않았습니다." msgid "Cannot keep data when 'x' is a dissimilarity!" msgstr "" msgid "Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2" msgstr "군집(clusters)의 개수 'k'는 반드시 {1,2, .., n-1}내에 존재해야 하므로 n >= 2 입니다." msgid "'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d" msgstr "'medoids'는 반드시 NULL 또는 {1,2, .., n}으로부터 %1$d개의 구분되는 인덱스를 가진 벡터입니다 (n=%2$d). " msgid "No clustering performed, NAs in the computed dissimilarity matrix." msgstr "계산된 비유사성 행렬(dissimilarity matrix) 내에 NA가 존재하여 군집화 과정이 수행되지 않았습니다." msgid "error from .C(cl_pam, *): invalid medID's" msgstr ".C(cl_pam, *)으로부터 에러가 발생했습니다: medID가 올바르지 않습니다." msgid "NA-values are not allowed in dist-like 'x'." msgstr "'x'는 NA를 가질 수 없습니다." msgid "Distances must be result of dist or a square matrix." msgstr "거리(distances)는 반드시 dist 또는 정방행렬(square matrix)의 결과이어야 합니다." msgid "the square matrix is not symmetric." msgstr "대칭(symmetric)적인 정방행렬이 아닙니다." msgid ">>>>> funny case in clusplot.default() -- please report!" msgstr ">>>>> clusplot.default()에서 예상치 못한 경우가 발생했습니다 -- 보고해 주시길 부탁드립니다!" msgid "x is not a data matrix" msgstr "x는 데이터 행렬(data matrix)이 아닙니다." msgid "one or more objects contain only missing values" msgstr "하나 또는 그 이상의 객체들이 오로지 결측값만을 포함하고 있습니다." msgid "one or more variables contain only missing values" msgstr "하나 또는 그 이상의 변수들이 오로지 결측값만을 포함하고 있습니다." msgid "Missing values were displaced by the median of the corresponding variable(s)" msgstr "결측값들은 대응변수(들)의 중앙값으로 대체되었습니다." msgid "x is not numeric" msgstr "x는 수치형(numeric)이 아닙니다." msgid "The clustering vector is of incorrect length" msgstr "군집벡터(clustering vector)의 길이가 올바르지 않습니다." msgid "NA-values are not allowed in clustering vector" msgstr "군집벡터(clustering vector)에서는 NA가 허용되지 않습니다." msgid "" "Error in Fortran routine for the spanning ellipsoid,\n" " rank problem??" msgstr "" "스패닝 타원(spanning ellipsoid)를 생성하는 포트란 루틴(Fortran routine)에서 에러가 발생했습니다. \n" " 위수(rank) 문제인가요??" msgid "'col.clus' should have length 4 when color is TRUE" msgstr "color가 TRUE일 때, 'col.clus'의 길이는 반드시 4이어야 합니다." msgid "no diss nor data found, nor the original argument of %s" msgstr "diss와 data 모두 찾을 수 없을 뿐만 아니라 원래의 인자 %s 또한 찾을 수 없습니다." msgid "no diss nor data found for clusplot()'" msgstr "clusplot()에 사용될 diss와 data 모두 찾을 수 없습니다." msgid "invalid partition object" msgstr "partition 객체가 유효하지 않습니다." msgid "full silhouette is only available for results of 'clara(*, keep.data = TRUE)'" msgstr "full silhouette는 'clara(*, keep.data = TRUE)'의 결과만에 오로지 사용할 수 있습니다." msgid "'x' must only have integer codes" msgstr "'x'는 오로지 정수형 코드(codes)만을 가질 수 있습니다." msgid "Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'" msgstr "dissimilarity 'dist' 또는 diss.matrix 'dmatrix' 둘 중에 하나가 필요합니다." msgid "'dmatrix' is not a dissimilarity matrix compatible to 'x'" msgstr "'dmatrix'는 'x'에 부합하는 (또는 사용할 수 있는) 비유사성 행렬(dissimilarity matrix)이 아닙니다." msgid "clustering 'x' and dissimilarity 'dist' are incompatible" msgstr "'x'와 'dist'가 서로 부합하지 않습니다." msgid "invalid silhouette structure" msgstr "silhouette 구조가 올바르지 않습니다." msgid "invalid 'silhouette' object" msgstr "'silhouette' 객체가 올바르지 않습니다." msgid "No valid silhouette information (#{clusters} =? 1)" msgstr "유효한 silhouette 정보가 없습니다 (#{clusters} =? 1)" msgid "Observation %s has *only* NAs --> omit it for clustering" msgid_plural "Observations %s have *only* NAs --> omit them for clustering!" msgstr[0] "관측값 %s는 *오로지* NA만을 가집니다 --> 군집화를 위하여 이것들을 제거합니다!" msgid "%d observation (%s) has *only* NAs --> omit them for clustering!" msgid_plural "%d observations (%s ...) have *only* NAs --> omit them for clustering!" msgstr[0] "%d개의 관측값들이 (%s) *오로지* NA만을 가집니다 --> 군집화를 위하여 이들을 제거합니다!" msgid "setting 'logical' variable %s to type 'asymm'" msgid_plural "setting 'logical' variables %s to type 'asymm'" msgstr[0] "'logical' 변수 %s를 유형(type) 'asymm'으로 설정합니다." cluster/inst/0000755000175100001440000000000012553131037012726 5ustar hornikuserscluster/inst/po/0000755000175100001440000000000012553131037013344 5ustar hornikuserscluster/inst/po/pl/0000755000175100001440000000000012553131037013757 5ustar hornikuserscluster/inst/po/pl/LC_MESSAGES/0000755000175100001440000000000012553131037015544 5ustar hornikuserscluster/inst/po/pl/LC_MESSAGES/R-cluster.mo0000644000175100001440000003035612461440665020000 0ustar hornikusersW7Ie'}3 29- ?g 5 ? $ MB &  )  C @U $ < ; !4 V 8w : 4 V Dw <  Ly2.+(2T;6?B:T}GEK`2>v,;104L8708#-\5)M8$W|%"%&H7o  /1\7F#2:2m)'!  B#^#)C+N@9>D?M0f,%R-n)D@ ,L Fy V '!<?!K|!A!B "jM"F"="=#O#3"$4V$7$3$7$9/%:i%D%Y%ZC&T&T&;H'N''1(@(<*)!g)B)E)D*CW*8*K*M +-n+P+)+',?,_,z,,!,&,.,;--Ai-(--C-@/.p.F/(I/Cr/C/&/@!09b0030E@$IS5?L!J &-)  > /"QW.'(;4R7C,TB09=P2GUNM3D8 1#:*F<O%+HKVA6%d observation (%s) has *only* NAs --> omit them for clustering!%d observations (%s ...) have *only* NAs --> omit them for clustering!%s has constant columns %s; these are standardized to 0%s has invalid column names%s must be in 1:ncol(x)%s must contain column names or numbers'A' must be p x p cov-matrix defining an ellipsoid'B' has to be a positive integer'col.clus' should have length 4 when color is TRUE'dmatrix' is not a dissimilarity matrix compatible to 'x''iniMem.p' must be a nonnegative n * k matrix with rowSums == 1'k' (number of clusters) must be in {1,2, .., n/2 -1}'m', a membership matrix, must be nonnegative with rowSums == 1'maxit' must be non-negative integer'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d'memb.exp' must be a finite number > 1'n' must be >= 2'par.method' must be of length 1, 3, or 4'samples' should be at least 1'sampsize' = %d should not be larger than the number of objects, %d'sampsize' should be at least %d = max(2, 1+ number of clusters)'weights' must be of length p (or 1)'x' is a "dist" object, but should be a data matrix or frame'x' is not and cannot be converted to class "dissimilarity"'x' must be numeric n x p matrix'x' must only have integer codes>>>>> funny case in clusplot.default() -- please report!All variables must be binary (e.g., factor with 2 levels).Distances must be result of dist or a square matrix.Each of the random samples contains objects between which no distance can be computed.Error in Fortran routine for the spanning ellipsoid, rank problem??FANNY algorithm has not converged in 'maxit' = %d iterationsFor each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values).Missing values were displaced by the median of the corresponding variable(s)NA values in the dissimilarity matrix not allowed.NA-values are not allowed in clustering vectorNA-values are not allowed in dist-like 'x'.NA-values in the dissimilarity matrix not allowed.Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'No clustering performed, NA's in dissimilarity matrix.No clustering performed, NA-values in the dissimilarity matrix.No clustering performed, NAs in the computed dissimilarity matrix.No clustering performed, a variable was found with all non missing values identical.No clustering performed, all variables have at least one missing value.No clustering performed, an object was found with all values missing.No clustering performed, found variable with more than half values missing.No valid silhouette information (#{clusters} =? 1)Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2Observation %s has *only* NAs --> omit it for clusteringObservations %s have *only* NAs --> omit them for clustering!The clustering vector is of incorrect lengthThe number of cluster should be at least 1 and at most n-1.algorithm possibly not converged in %d iterationsambiguous clustering methodat least one binary variable has more than 2 levels.at least one binary variable has not 2 different levels.at least one binary variable has values not in {0,1,NA}binary variable(s) %s treated as interval scaledclustering 'x' and dissimilarity 'dist' are incompatiblecomputed some negative or all 0 probabilitiesellipsoidPoints() not yet implemented for p >= 3 dim.error from .C(cl_pam, *): invalid medID'sfull silhouette is only available for results of 'clara(*, keep.data = TRUE)'invalid %s; must be named listinvalid 'jstop' from .C(cl_clara,.):invalid 'silhouette' objectinvalid 'twins' objectinvalid clustering methodinvalid partition objectinvalid silhouette structureinvalid type %s for column numbers %sneed at least 2 objects to clusterno diss nor data found for clusplot()'no diss nor data found, nor the original argument of %sno points without missing valuesomitting NAsone or more objects contain only missing valuesone or more variables contain only missing valuessetting 'logical' variable %s to type 'asymm'setting 'logical' variables %s to type 'asymm'the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?the square matrix is not symmetric.when 'medoids.x' is FALSE, 'keep.data' must be toowith mixed variables, metric "gower" is used automaticallyx is not a data matrixx is not a dataframe or a numeric matrix.x is not a numeric dataframe or matrix.x is not numericx must be a matrix or data frame.Project-Id-Version: cluster 1.15.1 Report-Msgid-Bugs-To: bugs.r-project.org POT-Creation-Date: 2015-01-26 14:31 PO-Revision-Date: 2014-03-27 17:34+0100 Last-Translator: Łukasz Daniel Language-Team: Łukasz Daniel Language: pl_PL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plural-Forms: nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); X-Poedit-SourceCharset: iso-8859-1 X-Generator: Poedit 1.5.4 %d obserwacja (%s) posiada *tylko* wartości NA --> pomijanie jej w grupowaniu%d obserwacje (%s ...) posiadają *tylko* wartości NA --> pomijanie ich w grupowaniu%d obserwacji (%s ...) posiadają *tylko* wartości NA --> pomijanie ich w grupowaniu%s posiada stałe kolumny %s; zostały one ustandaryzowane do zera%s posiada niepoprawne nazwy kolumn%s musi być w przedziale 1:ncol(x)%s musi zawierać nazwy kolumn lub liczby'A' musi być macierzą kowariancji p x p określającą elipsoidę'B' musi być dodatnią liczbą całkowitąargument 'col.clus' powinien mieć długość 4, gdy 'color' ma wartość TRUE'dmatrix' nie jest macierzą różnic kompatybilną z 'x''iniMem.p' musi być nieujemną maceirzą n x k z rowSums == 1'k' (liczba grup) musi mieścić się w przedziale {1,2, .., n/2 -1}macierz przynależności 'm' musi być nieujemna z rowSums == 1'maxit' musi być nieujemną liczbą całkowitąargument 'medoids' musi być wartością NULL lub wektorem %d różnych indeksów w {1,2, .., n}, n=%d'memb.exp' musi być skończoną liczbą > 1argument 'n' musi być >= 2'par.method' musi być długości 1, 3, lub 4'samples' powinno wynosić przynajmniej 1'sampsize' = %d nie powinien być większy niż liczba obiektów, %d'sampsize' powinien być co najmniej %d = max(2, 1+ liczba grup)'weights' musi być o długości 'p' (lub 1)'x' jest obiektem klasy "dist", ale powinien być macierzą lub ramkąargument 'x' nie jest i nie może być przekształcony na obiekt klasy "dissimilarity"'x' musi być liczbową macierzą n x p'x' musi posiadać tylko kody będące liczbami całkowitymi>>>>> zabawny przypadek w 'clusplot.default()' -- proszę zgłosić raport!Wszystkie zmienne muszą być binarne (czynnik z dwoma poziomami)Odległości muszą być wynikiem 'dist' lub macierzy kwadratowej.Każda z losowych próbek zawiera obiekty pomiędzy którymi żadna odległość nie może być obliczona.Błąd w procedurze Fortran dla elipsoidy obejmującej, problem rang?algorytm FANNY nie uzbieżnił się w 'maxit' = %d iteracjachDla każdej z %d próbek, co najmniej jeden obiekt został znaleziony, który nie mógł być przypisany do grupy (z uwagi na brakujące wartości).Brakujące wartości zostały zastąpione przez medianę odpowiednich zmiennychwartości NA w macierzy różnic nie są dozwolone.wartości NA są niedozwolone w wektorze grupującymwartości NA nie są dozwolone w 'x' typu odległości.wartości NA w macierzy różnic nie są dozwolone.Potrzeba albo różnic 'dist' lub diss.matrix 'dmatrix'Nie wykonano grupowania, wartości NA w macierzy różnicNie wykonano grupowania, wartości NA w macierzy różnic.Nie wykonano grupowania, wyliczono wartości NA w macierzy różnic.Nie wykonano grupowania, znaleziono zmienną z identycznymi niebrakującymi wartościami.Nie wykonano grupowania, wszystkie zmienne mają co najmniej jedną brakującą wartość.Nie wykonano grupowania, znaleziono obiekt któremu brakowało wszystkich wartości.Nie wykonano grupowania, znaleziono obiekt któremu brakowało wszystkich wartości.Brak poprawnej informacji o sylwetce (czy liczba grup =? 1)Liczba grup 'k' musi zawierać się w zbiorze {1,2, .., n-1}; tak więc n >= 2Obserwacja %s posiada *tylko* wartości NA --> pomijanie jej w grupowaniuObserwacje %s posiadają *tylko* wartości NA --> pomijanie ich w grupowaniuObserwacje %s posiadają *tylko* wartości NA --> pomijanie ich w grupowaniuWektor grupujący posiada niepoprawną długośćLiczba grup powinna wynosić conajmniej 1 oraz co najwyżej n-1.algorytm prawdopodobnie nie uzbieżnił się w %d iteracjachniejednoznaczna metoda grupowaniaprzynajmniej jedna zmienna binarna posiada więcej niż 2 poziomy.przynajmniej jedna zmienna binarna nie posiada 2 różnych poziomów.przynajmniej jedna zmienna binarna posiada wartości poza {0, 1, NA}zmienne binarne %s traktowane jako interwał zostały przeskalowanegrupowane 'x' oraz różnice 'dist' nie są kompatybilneniektóre wyliczone prawdopodobieństwa są ujemne lub wszystkie są zerami'ellipsoidPoints()' nie została jeszcze zaimplementowana dla p >= 3 wymiary.błąd w '.C(cl_pam, *)': niepoprawne 'medID'pełna sylwetka jest dostępna jedynie dla wyników 'clara(*, keep.data = TRUE)'niepoprawne %s; musi być nazwaną listąniepoprawny 'jstop' z '.C(cl_clara,.)':niepoprawny obiekt 'silhouette'niepoprawny obiekt 'twins'niepoprawna metoda grupowanianiepoprawny obiekt podziałuniepoprana struktura 'silhouette'niepoprawny typ %s dla liczb kolumn %spotrzeba co najmniej 2 obiektów do grupowanianie znaleziono różnic ani danych dla funkcji 'clusplot()'nie znaleziono różnic ani danych, ani oryginalnego argumentu %sbrak punktów bez brakujących wartościpomijanie wartości NAjeden lub więcej obiektów zawierają jedynie wartości brakującejeden lub więcej zmiennych zawiera jedynie wartości brakująceustawianie zmiennej 'logical' %s na tym 'asymm'ustawianie zmiennych 'logical' %s na tym 'asymm'ustawianie zmiennych 'logical' %s na tym 'asymm'przynależności są bardzo bliskie 1/k. Może zmniejszyć 'memb.exp'?macierz kwadratowa nie jest symetryczna.kiedy 'medoids.x' jest FALSE, 'keep.data' musi być również FALSEz mieszanymi zmiennymi, metryka 'gower' jest używana automatycznieargument 'x' nie jest macierzą danychargument 'x' nie jest ramką danych ani też macierzą liczbowąargument 'x' nie jest ramką liczbową ani też macierząargument 'x' nie jest liczbąargument 'x' musi być macierzą lub ramką danych.cluster/inst/po/en@quot/0000755000175100001440000000000012553131037014757 5ustar hornikuserscluster/inst/po/en@quot/LC_MESSAGES/0000755000175100001440000000000012553131037016544 5ustar hornikuserscluster/inst/po/en@quot/LC_MESSAGES/cluster.mo0000644000175100001440000000227312553131017020564 0ustar hornikusers\ $9(H:I*Y$k9HM)w*C level clara(): random k=%d > n ** C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%dInvalid 'medoids'agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%gclara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'invalid method (code %d)pam(): Bug in C level cstat(), k=%d: ntt=0Project-Id-Version: cluster 2.0.3 Report-Msgid-Bugs-To: POT-Creation-Date: 2015-07-20 09:32+0200 PO-Revision-Date: 2015-07-20 09:32+0200 Last-Translator: Automatically generated Language-Team: none Language: en MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plural-Forms: nplurals=2; plural=(n != 1); C level clara(): random k=%d > n ** C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%dInvalid ‘medoids’agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%gclara()‘s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave ’toomany_NA'invalid method (code %d)pam(): Bug in C level cstat(), k=%d: ntt=0cluster/inst/po/en@quot/LC_MESSAGES/R-cluster.mo0000644000175100001440000002631612553131017020767 0ustar hornikusersX7!Yu'3 2 9= ?w 5 ? $- MR &  )  C! @e $ < ; !D f 8 : - 4) V^ D < 7L2.7+f2;6?8BxTGEXK2>v\,;1<n487008a-5)M(v$ %="c&7  /1C\uF#2=:p)'!%8G7@\'t7$6A0Cr9C(4Q]*-"G@D(@?7%w$8:16 4h V D @9!z!L!2G".z"/"2"C #6P#?#B#T $G_$E$K$29%Bl%v%,&&;S&1&&4&8'7K'0'@'-'5#()Y(Q(((()=)X)r))%)")&)7* P* q*/~*1*l*JM+#+:+:+2,)I,'s,,!,FA %JT6@"K '.*  ? M0#RX/()<5S8D-UC1:>Q3HVON4E9!2$;+G=P&,ILWB7%d observation (%s) has *only* NAs --> omit them for clustering!%d observations (%s ...) have *only* NAs --> omit them for clustering!%s has constant columns %s; these are standardized to 0%s has invalid column names%s must be in 1:ncol(x)%s must contain column names or numbers'A' must be p x p cov-matrix defining an ellipsoid'B' has to be a positive integer'col.clus' should have length 4 when color is TRUE'dmatrix' is not a dissimilarity matrix compatible to 'x''iniMem.p' must be a nonnegative n * k matrix with rowSums == 1'k' (number of clusters) must be in {1,2, .., n/2 -1}'m', a membership matrix, must be nonnegative with rowSums == 1'maxit' must be non-negative integer'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d'memb.exp' must be a finite number > 1'n' must be >= 2'par.method' must be of length 1, 3, or 4'samples' should be at least 1'sampsize' = %d should not be larger than the number of objects, %d'sampsize' should be at least %d = max(2, 1+ number of clusters)'weights' must be of length p (or 1)'x' is a "dist" object, but should be a data matrix or frame'x' is not and cannot be converted to class "dissimilarity"'x' must be numeric n x p matrix'x' must only have integer codes>>>>> funny case in clusplot.default() -- please report!All variables must be binary (e.g., factor with 2 levels).Cannot keep data when 'x' is a dissimilarity!Distances must be result of dist or a square matrix.Each of the random samples contains objects between which no distance can be computed.Error in Fortran routine for the spanning ellipsoid, rank problem??FANNY algorithm has not converged in 'maxit' = %d iterationsFor each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values).Missing values were displaced by the median of the corresponding variable(s)NA values in the dissimilarity matrix not allowed.NA-values are not allowed in clustering vectorNA-values are not allowed in dist-like 'x'.NA-values in the dissimilarity matrix not allowed.Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'No clustering performed, NA's in dissimilarity matrix.No clustering performed, NA-values in the dissimilarity matrix.No clustering performed, NAs in the computed dissimilarity matrix.No clustering performed, a variable was found with all non missing values identical.No clustering performed, all variables have at least one missing value.No clustering performed, an object was found with all values missing.No clustering performed, found variable with more than half values missing.No valid silhouette information (#{clusters} =? 1)Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2Observation %s has *only* NAs --> omit it for clusteringObservations %s have *only* NAs --> omit them for clustering!The clustering vector is of incorrect lengthThe number of cluster should be at least 1 and at most n-1.algorithm possibly not converged in %d iterationsambiguous clustering methodat least one binary variable has more than 2 levels.at least one binary variable has not 2 different levels.at least one binary variable has values not in {0,1,NA}binary variable(s) %s treated as interval scaledclustering 'x' and dissimilarity 'dist' are incompatiblecomputed some negative or all 0 probabilitiesellipsoidPoints() not yet implemented for p >= 3 dim.error from .C(cl_pam, *): invalid medID'sfull silhouette is only available for results of 'clara(*, keep.data = TRUE)'invalid %s; must be named listinvalid 'jstop' from .C(cl_clara,.):invalid 'silhouette' objectinvalid 'twins' objectinvalid clustering methodinvalid partition objectinvalid silhouette structureinvalid type %s for column numbers %sneed at least 2 objects to clusterno diss nor data found for clusplot()'no diss nor data found, nor the original argument of %sno points without missing valuesomitting NAsone or more objects contain only missing valuesone or more variables contain only missing valuessetting 'logical' variable %s to type 'asymm'setting 'logical' variables %s to type 'asymm'the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?the square matrix is not symmetric.when 'medoids.x' is FALSE, 'keep.data' must be toowith mixed variables, metric "gower" is used automaticallyx is not a data matrixx is not a dataframe or a numeric matrix.x is not a numeric dataframe or matrix.x is not numericx must be a matrix or data frame.Project-Id-Version: cluster 2.0.3 POT-Creation-Date: 2015-07-20 09:32 PO-Revision-Date: 2015-07-20 09:32 Last-Translator: Automatically generated Language-Team: none MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Language: en Plural-Forms: nplurals=2; plural=(n != 1); %d observation (%s) has *only* NAs --> omit them for clustering!%d observations (%s ...) have *only* NAs --> omit them for clustering!%s has constant columns %s; these are standardized to 0%s has invalid column names%s must be in 1:ncol(x)%s must contain column names or numbers‘A’ must be p x p cov-matrix defining an ellipsoid‘B’ has to be a positive integer‘col.clus’ should have length 4 when color is TRUE‘dmatrix’ is not a dissimilarity matrix compatible to ‘x’‘iniMem.p’ must be a nonnegative n * k matrix with rowSums == 1‘k’ (number of clusters) must be in {1,2, .., n/2 -1}‘m’, a membership matrix, must be nonnegative with rowSums == 1‘maxit’ must be non-negative integer‘medoids’ must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d‘memb.exp’ must be a finite number > 1‘n’ must be >= 2‘par.method’ must be of length 1, 3, or 4‘samples’ should be at least 1‘sampsize’ = %d should not be larger than the number of objects, %d‘sampsize’ should be at least %d = max(2, 1+ number of clusters)‘weights’ must be of length p (or 1)‘x’ is a "dist" object, but should be a data matrix or frame‘x’ is not and cannot be converted to class "dissimilarity"‘x’ must be numeric n x p matrix‘x’ must only have integer codes>>>>> funny case in clusplot.default() -- please report!All variables must be binary (e.g., factor with 2 levels).Cannot keep data when ‘x’ is a dissimilarity!Distances must be result of dist or a square matrix.Each of the random samples contains objects between which no distance can be computed.Error in Fortran routine for the spanning ellipsoid, rank problem??FANNY algorithm has not converged in ‘maxit’ = %d iterationsFor each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values).Missing values were displaced by the median of the corresponding variable(s)NA values in the dissimilarity matrix not allowed.NA-values are not allowed in clustering vectorNA-values are not allowed in dist-like ‘x’.NA-values in the dissimilarity matrix not allowed.Need either a dissimilarity ‘dist’ or diss.matrix ‘dmatrix’No clustering performed, NA's in dissimilarity matrix.No clustering performed, NA-values in the dissimilarity matrix.No clustering performed, NAs in the computed dissimilarity matrix.No clustering performed, a variable was found with all non missing values identical.No clustering performed, all variables have at least one missing value.No clustering performed, an object was found with all values missing.No clustering performed, found variable with more than half values missing.No valid silhouette information (#{clusters} =? 1)Number of clusters ‘k’ must be in {1,2, .., n-1}; hence n >= 2Observation %s has *only* NAs --> omit it for clusteringObservations %s have *only* NAs --> omit them for clustering!The clustering vector is of incorrect lengthThe number of cluster should be at least 1 and at most n-1.algorithm possibly not converged in %d iterationsambiguous clustering methodat least one binary variable has more than 2 levels.at least one binary variable has not 2 different levels.at least one binary variable has values not in {0,1,NA}binary variable(s) %s treated as interval scaledclustering ‘x’ and dissimilarity ‘dist’ are incompatiblecomputed some negative or all 0 probabilitiesellipsoidPoints() not yet implemented for p >= 3 dim.error from .C(cl_pam, *): invalid medID'sfull silhouette is only available for results of ‘clara(*, keep.data = TRUE)’invalid %s; must be named listinvalid ‘jstop’ from .C(cl_clara,.):invalid ‘silhouette’ objectinvalid ‘twins’ objectinvalid clustering methodinvalid partition objectinvalid silhouette structureinvalid type %s for column numbers %sneed at least 2 objects to clusterno diss nor data found for clusplot()'no diss nor data found, nor the original argument of %sno points without missing valuesomitting NAsone or more objects contain only missing valuesone or more variables contain only missing valuessetting ‘logical’ variable %s to type ‘asymm’setting ‘logical’ variables %s to type ‘asymm’the memberships are all very close to 1/k. Maybe decrease ‘memb.exp’ ?the square matrix is not symmetric.when ‘medoids.x’ is FALSE, ‘keep.data’ must be toowith mixed variables, metric "gower" is used automaticallyx is not a data matrixx is not a dataframe or a numeric matrix.x is not a numeric dataframe or matrix.x is not numericx must be a matrix or data frame.cluster/inst/po/ko/0000755000175100001440000000000012553131037013755 5ustar hornikuserscluster/inst/po/ko/LC_MESSAGES/0000755000175100001440000000000012553131037015542 5ustar hornikuserscluster/inst/po/ko/LC_MESSAGES/cluster.mo0000644000175100001440000000203412466225010017555 0ustar hornikusersL |$9IR*k$9;Iu1*C level clara(): random k=%d > n ** C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%dclara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'invalid method (code %d)pam(): Bug in C level cstat(), k=%d: ntt=0Project-Id-Version: cluster 1.15.2 Report-Msgid-Bugs-To: POT-Creation-Date: 2015-02-02 14:29+0100 PO-Revision-Date: 2015-02-06 21:56-0600 Last-Translator:Chel Hee Lee Language-Team: Chel Hee Lee Language: ko MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plural-Forms: nplurals=1; plural=0; C level clara(): random k=%d > n ** C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%dclara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'메소드가 올바르지 않습니다 (code %d).pam(): Bug in C level cstat(), k=%d: ntt=0cluster/inst/po/ko/LC_MESSAGES/R-cluster.mo0000644000175100001440000003374612466225010017772 0ustar hornikusersW7Ie'}3 29- ?g 5 ? $ MB &  )  C @U $ < ; !4 V 8w : 4 V Dw <  Ly2.+(2T;6?B:T}GEK`2>v,;104L8708#-\5)M8$W|%"%&H7o  /1\7F#2:2m)'!c umw;[4Cu2MyTsh=P7F& 4m F N A8!kz!r!HY"G"w"jb#d#q2$$Y@%%K:&O&I&% 'UF'W'v'k((wq))`k*m*>:+iy+j+GN,_,Q,6H-z-}-jx.[.2?/Ir/P/\ 0ej00\R111)1G 2.S2/2D2C2D;3j3J364]M4]4B 5yL555Q5hN636J6A67(x7D7E@$IS5?L!J &-)  > /"QW.'(;4R7C,TB09=P2GUNM3D8 1#:*F<O%+HKVA6%d observation (%s) has *only* NAs --> omit them for clustering!%d observations (%s ...) have *only* NAs --> omit them for clustering!%s has constant columns %s; these are standardized to 0%s has invalid column names%s must be in 1:ncol(x)%s must contain column names or numbers'A' must be p x p cov-matrix defining an ellipsoid'B' has to be a positive integer'col.clus' should have length 4 when color is TRUE'dmatrix' is not a dissimilarity matrix compatible to 'x''iniMem.p' must be a nonnegative n * k matrix with rowSums == 1'k' (number of clusters) must be in {1,2, .., n/2 -1}'m', a membership matrix, must be nonnegative with rowSums == 1'maxit' must be non-negative integer'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d'memb.exp' must be a finite number > 1'n' must be >= 2'par.method' must be of length 1, 3, or 4'samples' should be at least 1'sampsize' = %d should not be larger than the number of objects, %d'sampsize' should be at least %d = max(2, 1+ number of clusters)'weights' must be of length p (or 1)'x' is a "dist" object, but should be a data matrix or frame'x' is not and cannot be converted to class "dissimilarity"'x' must be numeric n x p matrix'x' must only have integer codes>>>>> funny case in clusplot.default() -- please report!All variables must be binary (e.g., factor with 2 levels).Distances must be result of dist or a square matrix.Each of the random samples contains objects between which no distance can be computed.Error in Fortran routine for the spanning ellipsoid, rank problem??FANNY algorithm has not converged in 'maxit' = %d iterationsFor each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values).Missing values were displaced by the median of the corresponding variable(s)NA values in the dissimilarity matrix not allowed.NA-values are not allowed in clustering vectorNA-values are not allowed in dist-like 'x'.NA-values in the dissimilarity matrix not allowed.Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'No clustering performed, NA's in dissimilarity matrix.No clustering performed, NA-values in the dissimilarity matrix.No clustering performed, NAs in the computed dissimilarity matrix.No clustering performed, a variable was found with all non missing values identical.No clustering performed, all variables have at least one missing value.No clustering performed, an object was found with all values missing.No clustering performed, found variable with more than half values missing.No valid silhouette information (#{clusters} =? 1)Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2Observation %s has *only* NAs --> omit it for clusteringObservations %s have *only* NAs --> omit them for clustering!The clustering vector is of incorrect lengthThe number of cluster should be at least 1 and at most n-1.algorithm possibly not converged in %d iterationsambiguous clustering methodat least one binary variable has more than 2 levels.at least one binary variable has not 2 different levels.at least one binary variable has values not in {0,1,NA}binary variable(s) %s treated as interval scaledclustering 'x' and dissimilarity 'dist' are incompatiblecomputed some negative or all 0 probabilitiesellipsoidPoints() not yet implemented for p >= 3 dim.error from .C(cl_pam, *): invalid medID'sfull silhouette is only available for results of 'clara(*, keep.data = TRUE)'invalid %s; must be named listinvalid 'jstop' from .C(cl_clara,.):invalid 'silhouette' objectinvalid 'twins' objectinvalid clustering methodinvalid partition objectinvalid silhouette structureinvalid type %s for column numbers %sneed at least 2 objects to clusterno diss nor data found for clusplot()'no diss nor data found, nor the original argument of %sno points without missing valuesomitting NAsone or more objects contain only missing valuesone or more variables contain only missing valuessetting 'logical' variable %s to type 'asymm'setting 'logical' variables %s to type 'asymm'the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?the square matrix is not symmetric.when 'medoids.x' is FALSE, 'keep.data' must be toowith mixed variables, metric "gower" is used automaticallyx is not a data matrixx is not a dataframe or a numeric matrix.x is not a numeric dataframe or matrix.x is not numericx must be a matrix or data frame.Project-Id-Version: cluster 1.15.2 POT-Creation-Date: 2015-01-30 12:14 PO-Revision-Date: 2015-02-06 21:56-0600 Last-Translator:Chel Hee Lee Language-Team: Chel Hee Lee Language: ko MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plural-Forms: nplurals=1; plural=0; %d개의 관측값들이 (%s) *오로지* NA만을 가집니다 --> 군집화를 위하여 이들을 제거합니다!%1$s는 상수(constant)값을 가지는 열 %2$s를 가집니다. 이들은 0으로 표준화(standardized)됩니다.%s는 올바른 열이름을 가지고 있지 않습니다.%s는 반드시 1:ncol(x)내에 있어야 합니다.%s는 반드시 열 이름 또는 번호를 포함해야 합니다.'A'는 반드시 타원(ellipsoid)를 정의하는 크기가 p x p인 공분산행렬(cov-matrix)이어야 합니다.'B'는 반드시 양의 정수이어야 합니다.color가 TRUE일 때, 'col.clus'의 길이는 반드시 4이어야 합니다.'dmatrix'는 'x'에 부합하는 (또는 사용할 수 있는) 비유사성 행렬(dissimilarity matrix)이 아닙니다.'iniMem.p'는 반드시 크기가 n * k 인 비음수 행렬(nonnegative matrix)이어야 하며, 이 행렬의 rowSums == 1 이어야 합니다.'k' (군집의 개수)는 반드시 {1,2, .., n/2 -1} 내에 존재해야 합니다.멤버쉽 행렬(membership matrix) 'm'은 반드시 음수를 가지지 않으며 rowSums == 1이어야 합니다.'maxit'은 반드시 음이 아닌 정수이어야 합니다.'medoids'는 반드시 NULL 또는 {1,2, .., n}으로부터 %1$d개의 구분되는 인덱스를 가진 벡터입니다 (n=%2$d). 'memb.exp'는 반드시 1보다 큰 유한한(finite) 숫자이어야 합니다.'n'는 반드시 2보다 크거나 같아야 합니다.'par.method'의 길이는 반드시 1, 3, 또는 4이어야 합니다.'samples'는 적어도 1 이상 이어야 합니다.'sampsize' = %1$d는 객체의 개수 %2$d보다 클 수 없습니다.'sampsize'는 최소 %d = max(2, 1 + 군집의 개수)가 되어야 합니다.'weights'의 길이는 반드시 p (또는 1)이어야 합니다.'x'는 클래스 "dist"를 가지는 객체이지만, 데이터 행렬 또는 프레임이어야 합니다.'x'는 "dissimilarity"이라는 클래스가 아니거나 클래스 "dissimilarity"로 전환할 수 없습니다.'x'는 반드시 크기가 n x p인 수치형 행렬이어야 합니다.'x'는 오로지 정수형 코드(codes)만을 가질 수 있습니다.>>>>> clusplot.default()에서 예상치 못한 경우가 발생했습니다 -- 보고해 주시길 부탁드립니다!모든 변수들은 반드시 2개의 수준(levels)으로 이루어진 요인(factor)이어야 합니다.거리(distances)는 반드시 dist 또는 정방행렬(square matrix)의 결과이어야 합니다.각각의 무작위 표본은 서로간의 거리를 계산할 수 없는 객체들을 포함하고 있습니다.스패닝 타원(spanning ellipsoid)를 생성하는 포트란 루틴(Fortran routine)에서 에러가 발생했습니다. 위수(rank) 문제인가요??FANNY 알고리즘은 'maxit' = %d번의 반복수행에도 수렴하지 않았습니다.%d개의 표본 각각에 대해서 결측값으로 인하여 어느 군집에도 배정할 수 없는 객체를 적어도 하나 이상 발견하였습니다.결측값들은 대응변수(들)의 중앙값으로 대체되었습니다.비유사성 행렬(dissimilarity matrix)는 NA 값을 가질 수 없습니다.군집벡터(clustering vector)에서는 NA가 허용되지 않습니다.'x'는 NA를 가질 수 없습니다.NA의 값은 비유사성 행렬(dissimilarity matrix)에 사용될 수 없습니다.dissimilarity 'dist' 또는 diss.matrix 'dmatrix' 둘 중에 하나가 필요합니다.비유사성 행렬(dissimilarity matrix)에 NA가 있기 때문에, 군집화 과정이 실행되지 않았습니다.비유사성 행렬(dissimilarity matrix)에서 NA 값이 발견되었기 때문에 군집화 과정이 실행되지 않았습니다.계산된 비유사성 행렬(dissimilarity matrix) 내에 NA가 존재하여 군집화 과정이 수행되지 않았습니다.결측되지 않은 모든 값들이 동일한 변수가 발견되어 군집화 과정이 수행되지 않았습니다.모든 변수들이 적어도 하나 이상의 결측값을 가지기 때문에 군집화 과정이 수행되지 않았습니다.모든 값이 결측된 객체가 발견되어 군집화 과정이 수행되지 않았습니다.절반 이상의 값들이 결측된 변수가 발견되어 군집화 과정이 수행되지 않았습니다.유효한 silhouette 정보가 없습니다 (#{clusters} =? 1)군집(clusters)의 개수 'k'는 반드시 {1,2, .., n-1}내에 존재해야 하므로 n >= 2 입니다.관측값 %s는 *오로지* NA만을 가집니다 --> 군집화를 위하여 이것들을 제거합니다!군집벡터(clustering vector)의 길이가 올바르지 않습니다.군집(cluster)의 개수는 적어도 1 이상이며 최대 n-1 이내에 있어야 합니다.알고리즘의 %d번의 반복수행에도 수렴하지 않을 수 있습니다.불분명한 군집방법(clustering method)입니다.적어도 하나 이상의 이항변수(binary variable)가 두 가지 이상의 수준(levels)을 가지고 있습니다.적어도 하나 이상의 이항변수(binary variable)이 서로 다른 두 가지 수준을 가지고 있지 않습니다.적어도 하나 이상의 이항변수(binary variable)이 {0,1,NA} 외의 값을 가지고 있습니다.이항변수(binary variable) %s는 구간척도(interval scale)로서 다루어집니다. 'x'와 'dist'가 서로 부합하지 않습니다.확률값이 모두 0이거나 일부가 음수로 산출되었습니다.ellipsoidPoints()는 p >= 3 인경우에는 아직 구현되지 않았습니다..C(cl_pam, *)으로부터 에러가 발생했습니다: medID가 올바르지 않습니다.full silhouette는 'clara(*, keep.data = TRUE)'의 결과만에 오로지 사용할 수 있습니다.사용할 수 있는 %s가 아닙니다. 반드시 구성요소에 이름이 부여된 리스트(named list)이여야 합니다..C(cl_clara,.)으로부터 얻어진 'jstop'는 다음과 같은 이유로 이상합니다: 'silhouette' 객체가 올바르지 않습니다.올바른 'twins' 객체가 아닙니다.군집방법(clustering method)의 이름이 올바르지 않습니다.partition 객체가 유효하지 않습니다.silhouette 구조가 올바르지 않습니다.행번호 %2$s에 잘못된 유형(type) %1$s이 주어졌습니다.군집(cluster)는 적어도 2개의 객체를 필요로 합니다.clusplot()에 사용될 diss와 data 모두 찾을 수 없습니다.diss와 data 모두 찾을 수 없을 뿐만 아니라 원래의 인자 %s 또한 찾을 수 없습니다.결측값들을 제외하면 사용가능한 포인트들이 없습니다.NA를 삭제합니다.하나 또는 그 이상의 객체들이 오로지 결측값만을 포함하고 있습니다.하나 또는 그 이상의 변수들이 오로지 결측값만을 포함하고 있습니다.'logical' 변수 %s를 유형(type) 'asymm'으로 설정합니다.멤버쉽(membership) 전부가 1/k에 매우 가깝습니다. 아마도 'memb.exp'를 줄여보는 것은 어떨까요?대칭(symmetric)적인 정방행렬이 아닙니다.'medoids.x'가 FALSE인 경우에는 'keep.data' 역시 FALSE이어야 합니다.혼합형 변수(mixed variables)를 이용할 때는 metric은 자동으로 "gower"가 사용됩니다.x는 데이터 행렬(data matrix)이 아닙니다.x는 데이터 프레임이 아니거나 수치형 행렬이 아닙니다.x는 수치형 데이터 프레임 또는 행렬이 아닙니다.x는 수치형(numeric)이 아닙니다.x는 반드시 행렬 또는 데이터 프레임이어야 합니다.cluster/inst/po/fr/0000755000175100001440000000000012553131037013753 5ustar hornikuserscluster/inst/po/fr/LC_MESSAGES/0000755000175100001440000000000012553131037015540 5ustar hornikuserscluster/inst/po/fr/LC_MESSAGES/R-cluster.mo0000644000175100001440000003031612461440665017770 0ustar hornikusersW7Ie'}3 29- ?g 5 ? $ MB &  )  C @U $ < ; !4 V 8w : 4 V Dw <  Ly2.+(2T;6?B:T}GEK`2>v,;104L8708#-\5)M8$W|%"%&H7o  /1\7F#2:2m)'!k uA $M$r4I >7EvG4K9)c(<-P ~CB)&KPQ+& VA ` I lC!]!@"O"c"R9#O#H#V%$Y|$K$X"%Y{%Z%`0&R&e&<J'E''3^(7(B( );-)4i)?)N)K-*Ey*3*/*`#+++,+++!,"4,"W,3z,E,5,I*-#t--@-D-v4.U.)/=+/Ri/#/J/J+0v0@0E@$IS5?L!J &-)  > /"QW.'(;4R7C,TB09=P2GUNM3D8 1#:*F<O%+HKVA6%d observation (%s) has *only* NAs --> omit them for clustering!%d observations (%s ...) have *only* NAs --> omit them for clustering!%s has constant columns %s; these are standardized to 0%s has invalid column names%s must be in 1:ncol(x)%s must contain column names or numbers'A' must be p x p cov-matrix defining an ellipsoid'B' has to be a positive integer'col.clus' should have length 4 when color is TRUE'dmatrix' is not a dissimilarity matrix compatible to 'x''iniMem.p' must be a nonnegative n * k matrix with rowSums == 1'k' (number of clusters) must be in {1,2, .., n/2 -1}'m', a membership matrix, must be nonnegative with rowSums == 1'maxit' must be non-negative integer'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d'memb.exp' must be a finite number > 1'n' must be >= 2'par.method' must be of length 1, 3, or 4'samples' should be at least 1'sampsize' = %d should not be larger than the number of objects, %d'sampsize' should be at least %d = max(2, 1+ number of clusters)'weights' must be of length p (or 1)'x' is a "dist" object, but should be a data matrix or frame'x' is not and cannot be converted to class "dissimilarity"'x' must be numeric n x p matrix'x' must only have integer codes>>>>> funny case in clusplot.default() -- please report!All variables must be binary (e.g., factor with 2 levels).Distances must be result of dist or a square matrix.Each of the random samples contains objects between which no distance can be computed.Error in Fortran routine for the spanning ellipsoid, rank problem??FANNY algorithm has not converged in 'maxit' = %d iterationsFor each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values).Missing values were displaced by the median of the corresponding variable(s)NA values in the dissimilarity matrix not allowed.NA-values are not allowed in clustering vectorNA-values are not allowed in dist-like 'x'.NA-values in the dissimilarity matrix not allowed.Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'No clustering performed, NA's in dissimilarity matrix.No clustering performed, NA-values in the dissimilarity matrix.No clustering performed, NAs in the computed dissimilarity matrix.No clustering performed, a variable was found with all non missing values identical.No clustering performed, all variables have at least one missing value.No clustering performed, an object was found with all values missing.No clustering performed, found variable with more than half values missing.No valid silhouette information (#{clusters} =? 1)Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2Observation %s has *only* NAs --> omit it for clusteringObservations %s have *only* NAs --> omit them for clustering!The clustering vector is of incorrect lengthThe number of cluster should be at least 1 and at most n-1.algorithm possibly not converged in %d iterationsambiguous clustering methodat least one binary variable has more than 2 levels.at least one binary variable has not 2 different levels.at least one binary variable has values not in {0,1,NA}binary variable(s) %s treated as interval scaledclustering 'x' and dissimilarity 'dist' are incompatiblecomputed some negative or all 0 probabilitiesellipsoidPoints() not yet implemented for p >= 3 dim.error from .C(cl_pam, *): invalid medID'sfull silhouette is only available for results of 'clara(*, keep.data = TRUE)'invalid %s; must be named listinvalid 'jstop' from .C(cl_clara,.):invalid 'silhouette' objectinvalid 'twins' objectinvalid clustering methodinvalid partition objectinvalid silhouette structureinvalid type %s for column numbers %sneed at least 2 objects to clusterno diss nor data found for clusplot()'no diss nor data found, nor the original argument of %sno points without missing valuesomitting NAsone or more objects contain only missing valuesone or more variables contain only missing valuessetting 'logical' variable %s to type 'asymm'setting 'logical' variables %s to type 'asymm'the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?the square matrix is not symmetric.when 'medoids.x' is FALSE, 'keep.data' must be toowith mixed variables, metric "gower" is used automaticallyx is not a data matrixx is not a dataframe or a numeric matrix.x is not a numeric dataframe or matrix.x is not numericx must be a matrix or data frame.Project-Id-Version: cluster 1.14.5 POT-Creation-Date: 2015-01-26 14:31 PO-Revision-Date: 2014-03-30 09:03+0100 Last-Translator: Philippe Grosjean Language-Team: none Language: fr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Generator: Poedit 1.6.4 Plural-Forms: nplurals=2; plural=(n > 1); %d observation (%s) n'a *que* des NAs --> ignorée pour le regroupement!%d observations (%s) n'ont *que* des NAs --> ignorées pour le regroupement!%s à des colonnes constantes %s ; elles sont standardisées à 0%s a des noms de colonnes incorrects%s doit être compris dans 1:ncol(x)%s doit contenir des noms de colonnes ou des nombres'A doit être une matrice de covariance p x p définissant un ellipsoïde'B' doit être un entier positif'col.clus' doit avoir une longueur de 4 lorsque color est TRUE'dmatrix' n'est pas une matrice de dissimilarité compatible avec 'x''iniMem.p' doit être une matrice n * k non négative avec rowSums == 1'k' (nombre de groupes) doit être {1,2,…, n/2 -1}'m', une matrice d'appartenance, doit être non négative avec rowSums == 1'maxit' doit être un entier non négatif'medoids' doit être NULL ou un vecteur de %d valeurs d'indice distinctes dans {1, 2, …, n}, n=%d'memb.exp' doit être un nombre fini > 1'n" doit être >= 2'par.method' doit être de longueur 1, 3 ou 4'samples' doit valoir au moins 1'sampsize' = %d ne peut être plus grand que le nombre d'objets, %d'sampsize' doit être au minimum %d = max(2, 1+ nombre de groupes)'weights' doit être de longueur p (ou 1)'x' est un objet "dist", mais il faut une matrice ou un tableau de données'x' n'est pas et ne peux pas être converti en un objet de classe "dissimilarity"'x' doit être une matrice numérique n x p'x' doit n'avoir que des codes entiers>>>>> cas pathologique dans clusplot.default() -- veuillez envoyer un rapport de bug !Toutes les variables doivent être binaires (c'est-à-dire, des variables facteur à 2 niveaux).Les distances doivent résulter d'un objet dist ou d'une matrice carrée.Chacun des échantillons aléatoires contient des objets entre lesquels la distance ne peut être calculée.Erreur dans la routine Fortran pour obtenir l'ellipsoïde de dispersion, problème de rang??l'algorithme FANNY n'a pas convergé en 'maxit' = %d itérationsDans chacun des %d échantillons, au moins un objet ne peut être assigné à un groupe (parce qu'il contient des valeurs manquantes)Les valeurs manquantes ont été remplacées par la médiane de la ou des variables correspondantesles valeurs manquantes (NA) ne sont pas admises dans la matrice de dissimilarité.Les valeurs manquantes NA ne sont pas autorisées dans le vecteur d'agrégationdes valeurs manquantes NA ne sont pas autorisées dans 'x' de type dist.les valeurs manquantes (NA) ne sont pas autorisées dans la matrice de dissimilarité.Il faut soit un objet 'dist' de dissimilarité ou une matrice de dissimilarité 'dmatrix'Aucune agrégation n'est réalisée, NAs dans la matrice de dissimilarité.Aucune agrégation n'est réalisée, présence de NAs dans la matrice de dissimilarité.Aucune agrégation n'a été effectuée, NAs dans la matrice de dissimilarité calculée.Aucune agrégation n'a été effectuée, une variable a toutes ses valeurs non manquantes.Aucune agrégation n'a été effectuée, toutes les variables ont au moins une valeur manquante.Aucune agrégation n'a été effectuée, un objet a toutes ses valeurs manquantes.Aucune agrégation n'a été effectuée, une variable a plus de la moitié de ses valeurs manquantes.Aucune valeur de silhouette n'est correcte (#{groupes} =? 1)Le nombre de groupes 'k' doit être dans {1,2, …, n-1} ; où n >= 2L'observation %s n'a *que* des NAs --> ignorée pour le regroupementLes observations %s n'ont *que* des NAs --> ignorées pour le regroupement!Le vecteur d'agrégation est de longueur incorrecteLe nombre de groupes doit être compris entre 1 et n-1.l'algorithme n'a vraisemblablement pas convergé en %d itérationsméthode d'agrégation ambigüeau moins une des variables binaires a plus de deux niveaux.au moins une variable binaire n'a pas deux nivea ux.au moins une variable binaire a des valeurs autres que {0,1,NA}la ou les variables binaires %s sont traitées comme intervalles standardisésl'agrégation 'x' et la matrice de dissimilarité 'dist' sont incompatiblesdes probabilités négatives ou égales à zéro ont été calculéesellipsoidPoints() non implémenté pour p >= 3 dim.erreur depuis .C(cl_pam, *) : medIDs incorrectsla silhouette complète n'est disponible que pour les résultats de 'clara(*, keep.data = TRUE)'%s incorrect ; doit être une liste nommée'jstop' incorrect obtenu de .C(cl_clara,.) :objet 'silhouette' incorrectobjet 'twins' incorrectméthode d'agrégation incorrecteobjet de partitionnement incorrectstructure de silhouette incorrectetype inadéquat %s pour les numéros de colonnes %sau moins deux objets sont nécessaires pour effectuer une agrégationpas de diss ou de données trouvées pour clusplot()'pas de diss ou de données trouvées, même pas l'argument original de %saucun point sans valeurs manquantesvaleurs NAs ignoréesun ou plusieurs objets ne contiennent que des valeurs manquantesune ou plusieurs variables ne contiennent que des valeurs manquantesla variable 'logical' %s est transformée en type 'asymm'les variable 'logical' %s sont transformées en type 'asymm'les appartenances sont toutes très proches de 1/k. Essayez en diminuant 'memb.exp' ?la matrice carrée n'est pas symétrique.lorsque 'medoids.x' est FALSE, 'keep.data' doit l'être aussiavec des variables mélangées, la métrique "gower" est utilisée automatiquementx n'est pas une matrice de donnéesx n'est pas un tableau de données (data frame) ou une matrice numérique.x n'est pas un tableau de données (data frame) ou une matrice numérique.x n'est pas numériquex doit être une matrice ou un tableau de données (data frame).cluster/inst/po/de/0000755000175100001440000000000012553131037013734 5ustar hornikuserscluster/inst/po/de/LC_MESSAGES/0000755000175100001440000000000012553131037015521 5ustar hornikuserscluster/inst/po/de/LC_MESSAGES/cluster.mo0000644000175100001440000000240412466225010017535 0ustar hornikusers\ $9(H:I*$A]Jo*C level clara(): random k=%d > n ** C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%dInvalid 'medoids'agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%gclara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'invalid method (code %d)pam(): Bug in C level cstat(), k=%d: ntt=0Project-Id-Version: cluster 2.0.1 Report-Msgid-Bugs-To: POT-Creation-Date: 2015-02-02 14:29+0100 PO-Revision-Date: 2015-02-02 12:30+0100 Last-Translator: Detlef Steuer Language-Team: R Core Language: DE MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Plural-Forms: nplurals=2; plural=n == 1 ? 0 : 1; C Level clara(): random k=%d > n ** C Level dysta2(): nsel[%s= %d] = %d ist außerhalb von 0..n, n=%dunzulässige 'medoids'agnes(method=%d, par.method=*) führte zu unzulässigem Zusammenfassen; Schritt %d, D(.,.)=%gclara()'s C Level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) ergab 'toomany_NA'unzulässige Methode (Kode %d)pam(): Bug in C Level cstat(), k=%d: ntt=0cluster/inst/po/de/LC_MESSAGES/R-cluster.mo0000644000175100001440000002733112553131017017742 0ustar hornikusersX7!Yu'3 2 9= ?w 5 ? $- MR &  )  C! @e $ < ; !D f 8 : - 4) V^ D < 7L2.7+f2;6?8BxTGEXK2>v\,;1<n487008a-5)M(v$ %="c&7  /1C\uF#2=:p)'!%G>\ *G&JAq<K6<Os%]&Gn*"G@)XCN' (= Mf > L K@!_!G!C4"x"Q#/V#"#)#1#J$=P$A$M$e%U%Q%],&8&F&} ')'@'3''(7B(Bz(?(1(A/)=q)@).)T*.t*)*!** +(+G+)g+++/+M+ ;,\,7l,8,X,J6-.->-9-).,A.-n..&.FA %JT6@"K '.*  ? M0#RX/()<5S8D-UC1:>Q3HVON4E9!2$;+G=P&,ILWB7%d observation (%s) has *only* NAs --> omit them for clustering!%d observations (%s ...) have *only* NAs --> omit them for clustering!%s has constant columns %s; these are standardized to 0%s has invalid column names%s must be in 1:ncol(x)%s must contain column names or numbers'A' must be p x p cov-matrix defining an ellipsoid'B' has to be a positive integer'col.clus' should have length 4 when color is TRUE'dmatrix' is not a dissimilarity matrix compatible to 'x''iniMem.p' must be a nonnegative n * k matrix with rowSums == 1'k' (number of clusters) must be in {1,2, .., n/2 -1}'m', a membership matrix, must be nonnegative with rowSums == 1'maxit' must be non-negative integer'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d'memb.exp' must be a finite number > 1'n' must be >= 2'par.method' must be of length 1, 3, or 4'samples' should be at least 1'sampsize' = %d should not be larger than the number of objects, %d'sampsize' should be at least %d = max(2, 1+ number of clusters)'weights' must be of length p (or 1)'x' is a "dist" object, but should be a data matrix or frame'x' is not and cannot be converted to class "dissimilarity"'x' must be numeric n x p matrix'x' must only have integer codes>>>>> funny case in clusplot.default() -- please report!All variables must be binary (e.g., factor with 2 levels).Cannot keep data when 'x' is a dissimilarity!Distances must be result of dist or a square matrix.Each of the random samples contains objects between which no distance can be computed.Error in Fortran routine for the spanning ellipsoid, rank problem??FANNY algorithm has not converged in 'maxit' = %d iterationsFor each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values).Missing values were displaced by the median of the corresponding variable(s)NA values in the dissimilarity matrix not allowed.NA-values are not allowed in clustering vectorNA-values are not allowed in dist-like 'x'.NA-values in the dissimilarity matrix not allowed.Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'No clustering performed, NA's in dissimilarity matrix.No clustering performed, NA-values in the dissimilarity matrix.No clustering performed, NAs in the computed dissimilarity matrix.No clustering performed, a variable was found with all non missing values identical.No clustering performed, all variables have at least one missing value.No clustering performed, an object was found with all values missing.No clustering performed, found variable with more than half values missing.No valid silhouette information (#{clusters} =? 1)Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2Observation %s has *only* NAs --> omit it for clusteringObservations %s have *only* NAs --> omit them for clustering!The clustering vector is of incorrect lengthThe number of cluster should be at least 1 and at most n-1.algorithm possibly not converged in %d iterationsambiguous clustering methodat least one binary variable has more than 2 levels.at least one binary variable has not 2 different levels.at least one binary variable has values not in {0,1,NA}binary variable(s) %s treated as interval scaledclustering 'x' and dissimilarity 'dist' are incompatiblecomputed some negative or all 0 probabilitiesellipsoidPoints() not yet implemented for p >= 3 dim.error from .C(cl_pam, *): invalid medID'sfull silhouette is only available for results of 'clara(*, keep.data = TRUE)'invalid %s; must be named listinvalid 'jstop' from .C(cl_clara,.):invalid 'silhouette' objectinvalid 'twins' objectinvalid clustering methodinvalid partition objectinvalid silhouette structureinvalid type %s for column numbers %sneed at least 2 objects to clusterno diss nor data found for clusplot()'no diss nor data found, nor the original argument of %sno points without missing valuesomitting NAsone or more objects contain only missing valuesone or more variables contain only missing valuessetting 'logical' variable %s to type 'asymm'setting 'logical' variables %s to type 'asymm'the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?the square matrix is not symmetric.when 'medoids.x' is FALSE, 'keep.data' must be toowith mixed variables, metric "gower" is used automaticallyx is not a data matrixx is not a dataframe or a numeric matrix.x is not a numeric dataframe or matrix.x is not numericx must be a matrix or data frame.Project-Id-Version: R 3.2.1 Report-Msgid-Bugs-To: bugs.r-project.org POT-Creation-Date: 2015-01-26 14:31 PO-Revision-Date: 2014-03-28 10:16+0100 Last-Translator: Detlef Steuer Language-Team: R Core Team ausgelassen für Clustering%d Beobachtungen (%s) haben *nur* NAs --> ausgelassen für Clustering%s hat konstante Spalten %s; diese werden standardisiert auf 0%s hat unzulässige Spaltennamen%s muss aus 1:ncol(x) sein%s muss Spaltennamen oder Zahlen enthalten'A' muss eine p x p Kovarianzmatrix sein, die einen Ellipsoid definiert'B' muss eine positive ganze Zahl sein'col.clus' sollte Länge 4 haben, wenn color auf TRUE gesetzt ist'dmatrix' ist keine zu 'x' kompatible Unähnlichkeitsmatrix 'iniMem.p' muss eine nicht-negative n x k Matrix mit Zeilensummen == 1 sein'k' (Anzahl Cluster) muss aus {1, 2, ..., n/2 -1} sein'm' ist eine Mitgliedswertmatrix, muss nicht-negativ sein mit Zeilensummen == 1'maxit' muss nicht-negative Zahl sein'medoids' muss NULL oder ein Vektor von %d verschiedenen Indizes aus {1, 2,..., n}, n=%d sein'memb.exp' muss endliche Zahl > 1 sein'n' muss >= 2 sein'par.method' muss Länge 1, 3 oder 4 haben'samples' sollte mindestens 1 sein'sampsize' = %d; sollte nicht größer sein als die Zahl der Objekte %d'sampsize' sollte mindestens %d = max(2, 1+ Anzahl Cluster sein)'weights' muss von Länge p (oder 1) sein'x' ist ein "dist"-Objekt, sollte aber Datenmatrix oder -frame sein'x' ist nicht und kann auch nicht umgewandelt werden in Klasse "dissimilarity"'x' muss numerische n x p - Matrix sein'x' darf nur ganzzahlige Kodes enthalten>>>>> komische Sache in clusplot.default() -- bitte an den Entwickler senden!Alle Variablen müssen binär sein (z.B. Faktor mit 2 Stufen).Kann Datenmatrix 'data' nicht beibehalten wenn 'x' eine 'dissimilarity' ist!Distanzen müssen ein Ergebnis von dist oder eine quadratische Matrix sein.Jede der Zufallsstichproben enthält Objekte, zwischen denen kein Abstand berechnet werden kannFehler im Fortran-Kode für den aufspannenden Ellipsoiden, Rangproblem?FANNY Algorithmus ist in 'maxit' = %d Iterationen nicht konvergiertFür jede der %d Stichproben wurde mindestens ein Objekt gefunden, das nicht einem Cluster zugeordnet werden konnte (wegen fehlender Werte)Fehlende Werte wurden durch den Median der korrespondierenden Variable(n) ersetztNAs in der Unähnlichkeitsmatrix nicht erlaubt.NAs im Clustervektor nicht erlaubtNAs nicht erlaubt in dist-ähnlichem 'x'.NAs in der Unähnlichkeitsmatrix nicht zulässig.Benötige entweder Unähnlichkeitsmatrix 'dist' oder diss.matrix 'dmatrix'Keine Clusterung durchgeführt, NAs in Unähnlichkeitsmatrix.Keine Clusterung durchgeführt. NAs in der Unähnlichkeitsmatrix.Keine Clusterung durchgeführt, NAs in der berechneten Unähnlichkeitsmatrix.Keine Clusterung durchgeführt. Variable gefunden, bei der alle nicht fehlenden Werte identisch sind.Keine Clusterung durchgeführt. Alle Variablen haben mindestens einen fehlenden Wert.Keine Clusterung durchgeführt. Objekt gefunden, bei dem alle Werte fehlend sind.Keine Clusterung durchgeführt. Variable gefunden, mit mehr als der Hälfte fehlenden Werten.keine gültige Silhouetteninformation (#{clusters} =? 1)Anzahl der Cluster 'k' muss auch {1, 2, ..., n-1} sein; deshalb n >= 2Beobachtung %s hat *nur* NAs --> ausgelassen für ClusteringBeobachtungen %s haben *nur* NAs --> ausgelassen für ClusteringDer Clustervektor hat eine falsche LängeDie Anzahl der Cluster sollte mindestens 1, höchstens n-1 sein.Algorithmus hat nicht in %d Iterationen konvergiertzweideutige Clustermethodemindestens eine binäre Variable hat mehr als 2 Stufen.mindestens eine binäre Variable hat keine 2 verschiedenen Stufen.mindestens eine binäre Variable hat Werte nicht aus {0, 1, NA}Binärvariable %s als intervallskaliert behandeltClusterung 'x' und Unähnlichkeitsmatrix 'dist' sind inkompatibeleinige negative Wahrscheinlichkeiten oder alle zu 0 berechnetellipsoidPoints() noch nicht für Dimensionen p>=3 implementiertFehler aus .C(cl_pam, *): unzulässige medID'sdie volle Silhoutte ist nur verfügbar für Resultate von 'clara(*, keep.data=TRUE)'unzulässige %s; muss eine benannte Liste seinunzulässiger 'jstop' aus .C(cl_clara,.):unzulässiges 'silhouette' Objektunzulässiges 'twins'-Objektunzulässige Clustermethodeunzulässiges Partitionsobjektunzulässige Silhouttenstrukturungültiger Typ %s für Spaltennummern %sbenötige zum Clustern mindestens 2 Objekteweder diss noch data für 'clusplot()' gefundenweder diss noch data gefunden, ebensowenig das ursprüngliche Argument von %skeine Punkte ohne fehlende WerteNAs ausgelasseneines oder mehrere Objekte enthalten nur fehlende Werteeine oder mehrere Variablen enthalten nur fehlende Wertesetze 'logical' Variable %s auf Typ 'asymm'setze 'logical' Variablen %s auf Typ 'asymm'die Mitgliedswerte sind alle sehr nah an 1/k. Evtl. 'memb.exp' reduzieren?Die quadratische Matrix ist nicht symmetrisch.wenn 'medoids.x' FALSE ist, dann muss es auch 'keep.data' seinmit gemischten Variablen wird automatisch "gower" genutztx ist keine Datenmatrixx ist weder Dataframe noch numerische Matrixx ist weder numerischer Dataframe noch Matrixx ist nicht numerischx muss eine Matrix oder Dataframe seincluster/inst/CITATION0000644000175100001440000000240212460230700014053 0ustar hornikusers## -*- R -*- citHeader("To cite the R package 'cluster' in publications use:") year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date) vers <- paste("R package version", meta$Version) citEntry(entry = "Manual", title = "cluster: Cluster Analysis Basics and Extensions", author = c( person("Martin", "Maechler", email="maechler@stat.math.ethz.ch", role = c("aut", "cre"), comment = "enhancements, speed improvements, bug fixes, since 2000"), person("Peter", "Rousseeuw", email="rousse@uia.ua.ac.be", role="aut"), person("Anja", "Struyf", email="Anja.Struyf@uia.ua.ac.be", role="aut"), person("Mia", "Hubert", email="Mia.Hubert@uia.ua.ac.be", role="aut"), person("Kurt", "Hornik", role=c("trl","ctb"), comment = "R port; and much initial help file fixing, 1998--2000") ), year = year, note = paste(vers,"---", "For new features, see the 'Changelog' file (in the package source)"), ## FIXME: rather give the URL to the manual on CRAN ??? ## url = "http://stat.ethz.ch/CRAN/src/contrib/........", textVersion = paste( "Maechler, M., Rousseeuw, P., Struyf, A., Hubert, M., Hornik, K.(", year, "). cluster: Cluster Analysis Basics and Extensions. ", vers, ".", sep="")) cluster/inst/NEWS.Rd0000644000175100001440000001565712553131017014005 0ustar hornikusers% Check from R: % news(db = tools:::.build_news_db_from_package_NEWS_Rd("~/R/Pkgs/cluster/inst/NEWS.Rd"))! \name{NEWS} \title{News for \R Package \pkg{cluster}}% MM: look into ../svn-log-from.all \encoding{UTF-8} \newcommand{\CRANpkg}{\href{http://CRAN.R-project.org/package=#1}{\pkg{#1}}} %% NB: The date (yyyy-mm-dd) is the "Packaged:" date in ../DESCRIPTION \section{Changes in version 2.0.3 (2015-07-20, svn r6985)}{ \subsection{New Features}{ \itemize{ \item This new \file{NEWS.Rd} file -- going to replace \file{ChangeLog} eventually. } } \subsection{Bug Fixes}{ \itemize{ \item import all we need (but not more) from the "base" pkgs (stats, graphics, ...). } } } \section{Changes in version 2.0.2 (2015-06-18, svn r6955)}{ \subsection{New Features}{ \itemize{ \item using new \code{anyNA()} where appropriate. \item New Korean translations, thanks to Chel Hee Lee. \item \code{plotpart()}: \code{cmdscale()} tweaks. } } \subsection{Bug Fixes}{ \itemize{ \item valgrind detected missing allocation (\code{nisol["1"]} for k=1). \item typo R/daisy.q (R bug \PR{16430}). } } } \section{Changes in version 2.0.1 (2015-01-31, svn r6877)}{ \subsection{Bug Fixes}{ \itemize{ \item Fix \code{silhouette( obj )} for \code{obj <- pam(x, k = 1)}. } } } \section{Changes in version 2.0.0 (2015-01-29, svn r6874)}{ \subsection{New Features}{ \itemize{ \item \code{pam()} now using \code{.Call()} instead of \code{.C()} is potentially considerably more efficient. \item \code{agnes()} has improved \code{trace} behaviour; also, some invalid \code{par.method = *} settings now give an early and understandable error message. \item \code{lower.to.upper.tri.inds()} (etc) now returns \code{integer}. } } \subsection{Bug Fixes}{ \itemize{ \item \code{.C(..)} and \code{.Fortran(..)}: no longer using \code{DUP=FALSE} as that has become deprecated. } } } \section{Changes in version 1.15.3 (2014-09-04, svn r6804)}{ \subsection{New Features}{ \itemize{ \item \code{agnes()} and \code{diana()} finally get, respectively work with a \code{trace.lev} option. \item \code{plot.(agnes|diana)()} now deals well with long \code{call}s, by using multiple title lines. \item Message translations now also for C level error messages. } } \subsection{Bug Fixes}{ \itemize{ \item \code{agnes(*, method="flexible", par.method = c(a1, a2, b, c))}, i.e., \code{length(alpha) == 4}, finally works \emph{correctly}. } } } \section{Changes in version 1.15.2 (2014-03-31, svn r6724)}{ \subsection{New Features}{ \itemize{ \item Rewrote parts of the R level messages so they are more easily translatable, thanks to proposals by Lukasz Daniel. \item French translations from Philippe Grosjean. } } } \section{Changes in version 1.15.1 (2014-03-13, svn r6676)}{ \subsection{Bug Fixes}{ \itemize{ \item \code{mona} example not working in \R < 3.0.x. } } } \section{Changes in version 1.15.0 (2014-03-11, svn r6672)}{ \subsection{New Features}{ \itemize{ \item \code{agnes(*, method = "gaverage")} contributed by Pierre Roudier. \item documentation improvements; \item better translatable messages and translation updates. } } } %% ============================== FIXME =========================== %% ~~~~~~~~~ %% use ../ChangeLog %% ~~~~~~~~~ %% and then %% %% use ../svn-log-from.all %% ~~~~~~~~~~~~~~~~ %% and ../../cluster_Archive.lst {~= CRAN src/contrib/Archive/cluster/ : %% \section{Changes in version 1.14.4 (2013-03-26, svn r....)}{ \subsection{New Features}{ \itemize{ \item - } } \subsection{Bug Fixes}{ \itemize{ \item - } } } \section{Changes in version 1.14.3 (2012-10-14, svn r....)}{ \subsection{New Features}{ \itemize{ \item Polnish translations from Lukasz Daniel. } } \subsection{Bug Fixes}{ \itemize{ \item - } } } \section{Changes in version 1.14.2 (2012-02-06, svn r....)}{ \subsection{New Features}{ \itemize{ \item New \code{clusGap()} to compute the \dQuote{cluster Gap} goodness-of-fit statistic. } } \subsection{Bug Fixes}{ \itemize{ \item - } } } \section{Changes in version 1.14.1 (2011-10-16, svn r....)}{ \subsection{New Features}{ \itemize{ \item First translations (into German, thanks to Detlef Steuer). \item better \code{citation("cluster")} } } \subsection{Bug Fixes}{ \itemize{ \item \code{plot.silhouette(..., col = )} had ordering bug. } } } %% 253036 Mar 26 2013 cluster_1.14.4.tar.gz %% 259743 Oct 14 2012 cluster_1.14.3.tar.gz %% 250936 Feb 8 2012 cluster_1.14.2.tar.gz %% 246438 Oct 17 2011 cluster_1.14.1.tar.gz %% 226332 Jun 8 2011 cluster_1.14.0.tar.gz %% 214765 Feb 21 2011 cluster_1.13.3.tar.gz %% 213663 Nov 10 2010 cluster_1.13.2.tar.gz %% 214083 Jun 25 2010 cluster_1.13.1.tar.gz %% 214677 Apr 2 2010 cluster_1.12.3.tar.gz %% 214577 Oct 6 2009 cluster_1.12.1.tar.gz %% 215041 May 13 2009 cluster_1.12.0.tar.gz %% ============================== FIXME =========================== %% \section{Version 0.2-4}{ %% \subsection{..., 0.3-1, 0.3-n (n=3,5,...,26): 22 more CRAN releases}{ %% \itemize{ \item ............................................. } }} % How can I add vertical space ? % \preformatted{} is not allowed, nor is \cr \section{Version 1.2-0 (1999-04-11)}{ \subsection{First CRAN release of the \pkg{cluster} package, by Kurt Hornik}{ \itemize{ \item Martin Maechler had its own version independently. \item Both closely modeled after \code{clus} the tarball off JSS. }} \subsection{R Functions -- Fortran Files}{ \itemize{ \item \code{agnes()} -- \file{twins.f} for the \dQuote{twins} \code{agnes} and \code{diana}. \item \code{clara()} -- \code{clara.f} \item \code{daisy()} -- \file{daisy.f} (and \file{meet.f}) \item \code{diana()} -- (twins.f) \item \code{fanny()} -- \file{fanny.f} \item \code{mona()} -- \file{mona.f} \item \code{pam()} -- \file{pam.f} } } \subsection{Data Sets}{ \itemize{ \item agriculture \item animals \item flower \item ruspini \item votes.repub } } \subsection{Further Features}{ \itemize{ \item all Examples in \file{man/*.Rd} hand edited to become executable. \item \code{summary()}, \code{print()} (and \code{print.summary.**()} methods) for the six basic \R functions above. } } } \section{Version 1.1-2 (1998-06-16)}{ \subsection{Renamed previous \pkg{clus} to \pkg{cluster}}{ \itemize{ \item . } } } \section{Version 1.1-1 (1998-06-15)}{ \subsection{New Features}{ \itemize{ \item started \file{ChangeLog} } } } cluster/tests/0000755000175100001440000000000012553131040013105 5ustar hornikuserscluster/tests/mona.Rout.save0000644000175100001440000000646312124335144015665 0ustar hornikusers R version 3.0.0 beta (2013-03-25 r62402) -- "Masked Marvel" Copyright (C) 2013 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > data(animals) > (mani <- mona(animals)) Revised data: war fly ver end gro hai ant 0 0 0 0 1 0 bee 0 1 0 0 1 1 cat 1 0 1 0 0 1 cpl 0 0 0 0 0 1 chi 1 0 1 1 1 1 cow 1 0 1 0 1 1 duc 1 1 1 0 1 0 eag 1 1 1 1 0 0 ele 1 0 1 1 1 0 fly 0 1 0 0 0 0 fro 0 0 1 1 0 0 her 0 0 1 0 1 0 lio 1 0 1 1 1 1 liz 0 0 1 0 0 0 lob 0 0 0 0 0 0 man 1 0 1 1 1 1 rab 1 0 1 0 1 1 sal 0 0 1 0 0 0 spi 0 0 0 0 0 1 wha 1 0 1 1 1 0 Order of objects: [1] ant cpl spi lob bee fly fro her liz sal cat cow rab chi lio man ele wha duc [20] eag Variable used: [1] gro NULL hai fly gro ver end gro NULL war gro NULL end NULL NULL [16] hai NULL fly end Separation step: [1] 4 0 5 3 4 2 3 4 0 1 4 0 3 0 0 4 0 2 3 Available components: [1] "data" "order" "variable" "step" "call" "order.lab" > > str(mani) List of 6 $ data : int [1:20, 1:6] 0 0 1 0 1 1 1 1 1 0 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:20] "ant" "bee" "cat" "cpl" ... .. ..$ : chr [1:6] "war" "fly" "ver" "end" ... $ order : int [1:20] 1 4 19 15 2 10 11 12 14 18 ... $ variable : chr [1:19] "gro" "NULL" "hai" "fly" ... $ step : int [1:19] 4 0 5 3 4 2 3 4 0 1 ... $ call : language mona(x = animals) $ order.lab: chr [1:20] "ant" "cpl" "spi" "lob" ... - attr(*, "class")= chr "mona" > > if(require(MASS)) { + + if(R.version$major != "1" || as.numeric(R.version$minor) >= 7) + RNGversion("1.6") + set.seed(253) + n <- 512; p <- 3 + Sig <- diag(p); Sig[] <- 0.8 ^ abs(col(Sig) - row(Sig)) + x3 <- mvrnorm(n, rep(0,p), Sig) >= 0 + x <- cbind(x3, rbinom(n, size=1, prob = 1/2)) + + print(sapply(as.data.frame(x), table)) + + mx <- mona(x) + str(mx) + print(lapply(mx[c(1,3,4)], table)) + } Loading required package: MASS V1 V2 V3 V4 0 244 245 261 238 1 268 267 251 274 List of 5 $ data : int [1:512, 1:4] 0 0 0 0 1 1 1 0 1 0 ... $ order : int [1:512] 1 137 154 204 353 398 30 52 69 85 ... $ variable: int [1:511] 0 0 0 0 0 3 0 0 0 0 ... $ step : int [1:511] 0 0 0 0 0 4 0 0 0 0 ... $ call : language mona(x = x) - attr(*, "class")= chr "mona" $data 0 1 988 1060 $variable 0 1 2 3 4 496 4 1 5 5 $step 0 1 2 3 4 496 1 2 4 8 Warning message: In RNGkind("Marsaglia-Multicarry", "Buggy Kinderman-Ramage") : buggy version of Kinderman-Ramage generator used > > proc.time() user system elapsed 0.350 0.054 0.504 cluster/tests/ellipsoid-ex.Rout.save0000644000175100001440000001454412540507660017336 0ustar hornikusers R version 3.1.0 alpha (2014-03-26 r65294) Copyright (C) 2014 The R Foundation for Statistical Computing Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > eh <- ellipsoidhull(cbind(x=1:4, y = 1:4)) #singular Error in Fortran routine computing the spanning ellipsoid, probably collinear data Warning message: In ellipsoidhull(cbind(x = 1:4, y = 1:4)) : algorithm possibly not converged in 5000 iterations > eh ## center ok, shape "0 volume" --> Warning 'ellipsoid' in 2 dimensions: center = ( 2.5 2.5 ); squared ave.radius d^2 = 0 and shape matrix = x y x 1.25 1.25 y 1.25 1.25 hence, area = 0 ** Warning: ** the algorithm did not terminate reliably! most probably because of collinear data > > set.seed(157) > for(n in 4:10) { ## n=2 and 3 still differ -- platform dependently! + cat("n = ",n,"\n") + x2 <- rnorm(n) + print(ellipsoidhull(cbind(1:n, x2))) + print(ellipsoidhull(cbind(1:n, x2, 4*x2 + rnorm(n)))) + } n = 4 'ellipsoid' in 2 dimensions: center = ( 2.66215 0.82086 ); squared ave.radius d^2 = 2 and shape matrix = x2 1.55901 0.91804 x2 0.91804 0.67732 hence, area = 2.9008 'ellipsoid' in 3 dimensions: center = ( 2.50000 0.74629 2.95583 ); squared ave.radius d^2 = 3 and shape matrix = x2 1.25000 0.72591 1.8427 x2 0.72591 0.52562 1.5159 1.84268 1.51588 4.7918 hence, volume = 1.3843 n = 5 'ellipsoid' in 2 dimensions: center = ( 3.0726 1.2307 ); squared ave.radius d^2 = 2 and shape matrix = x2 2.21414 0.45527 x2 0.45527 2.39853 hence, area = 14.194 'ellipsoid' in 3 dimensions: center = ( 2.7989 1.1654 4.6782 ); squared ave.radius d^2 = 3 and shape matrix = x2 1.92664 0.40109 1.4317 x2 0.40109 1.76625 6.9793 1.43170 6.97928 28.0530 hence, volume = 11.532 n = 6 'ellipsoid' in 2 dimensions: center = ( 3.04367 0.97016 ); squared ave.radius d^2 = 2 and shape matrix = x2 4.39182 0.30833 x2 0.30833 0.59967 hence, area = 10.011 'ellipsoid' in 3 dimensions: center = ( 3.3190 0.7678 3.2037 ); squared ave.radius d^2 = 3 and shape matrix = x2 2.786928 -0.044373 -1.1467 x2 -0.044373 0.559495 1.5496 -1.146728 1.549620 5.5025 hence, volume = 10.741 n = 7 'ellipsoid' in 2 dimensions: center = ( 3.98294 -0.16567 ); squared ave.radius d^2 = 2 and shape matrix = x2 4.62064 -0.83135 x2 -0.83135 0.37030 hence, area = 6.3453 'ellipsoid' in 3 dimensions: center = ( 4.24890 -0.25918 -0.76499 ); squared ave.radius d^2 = 3 and shape matrix = x2 4.6494 -0.93240 -4.0758 x2 -0.9324 0.39866 1.9725 -4.0758 1.97253 10.4366 hence, volume = 6.9939 n = 8 'ellipsoid' in 2 dimensions: center = ( 3.6699 -0.4532 ); squared ave.radius d^2 = 2 and shape matrix = x2 9.4327 -2.5269 x2 -2.5269 3.7270 hence, area = 33.702 'ellipsoid' in 3 dimensions: center = ( 4.22030 -0.37953 -1.53922 ); squared ave.radius d^2 = 3 and shape matrix = x2 7.5211 -1.4804 -6.6587 x2 -1.4804 2.6972 11.8198 -6.6587 11.8198 52.6243 hence, volume = 36.383 n = 9 'ellipsoid' in 2 dimensions: center = ( 5.324396 -0.037779 ); squared ave.radius d^2 = 2 and shape matrix = x2 10.1098 -1.3708 x2 -1.3708 2.1341 hence, area = 27.885 'ellipsoid' in 3 dimensions: center = ( 5.44700 -0.12504 -1.13538 ); squared ave.radius d^2 = 3 and shape matrix = x2 7.0364 -1.2424 -5.5741 x2 -1.2424 1.7652 7.3654 -5.5741 7.3654 31.5558 hence, volume = 27.782 n = 10 'ellipsoid' in 2 dimensions: center = ( 4.85439 0.28401 ); squared ave.radius d^2 = 2 and shape matrix = x2 13.932 0.64900 x2 0.649 0.95132 hence, area = 22.508 'ellipsoid' in 3 dimensions: center = ( 5.12537 0.25024 0.86441 ); squared ave.radius d^2 = 3 and shape matrix = x2 9.29343 0.56973 1.4143 x2 0.56973 0.76519 1.8941 1.41427 1.89409 6.3803 hence, volume = 31.936 > > set.seed(1) > x <- rt(100, df = 4) > y <- 100 + 5 * x + rnorm(100) > ellipsoidhull(cbind(x,y)) 'ellipsoid' in 2 dimensions: center = ( -1.3874 93.0589 ); squared ave.radius d^2 = 2 and shape matrix = x y x 32.924 160.54 y 160.543 785.88 hence, area = 62.993 > z <- 10 - 8 * x + y + rnorm(100) > (e3 <- ellipsoidhull(cbind(x,y,z))) 'ellipsoid' in 3 dimensions: center = ( -0.71678 96.09950 111.61029 ); squared ave.radius d^2 = 3 and shape matrix = x y z x 26.005 126.41 -80.284 y 126.410 616.94 -387.459 z -80.284 -387.46 254.006 hence, volume = 130.45 > d3o <- cbind(x,y + rt(100,3), 2 * x^2 + rt(100, 2)) > (e. <- ellipsoidhull(d3o, ret.sq = TRUE)) 'ellipsoid' in 3 dimensions: center = ( 0.32491 101.68998 39.48045 ); squared ave.radius d^2 = 3 and shape matrix = x x 19.655 94.364 48.739 94.364 490.860 181.022 48.739 181.022 1551.980 hence, volume = 9463.8 > stopifnot(all.equal(e.$sqdist, + with(e., mahalanobis(d3o, center=loc, cov=cov)), + tol = 1e-13)) > d5 <- cbind(d3o, 2*abs(y)^1.5 + rt(100,3), 3*x - sqrt(abs(y))) > (e5 <- ellipsoidhull(d5, ret.sq = TRUE)) 'ellipsoid' in 5 dimensions: center = ( -0.32451 98.54780 37.33619 1973.88383 -10.81891 ); squared ave.radius d^2 = 5 and shape matrix = x x 17.8372 87.0277 8.3389 2607.9 49.117 87.0277 446.9453 -2.0502 12745.4 239.470 8.3389 -2.0502 1192.8439 2447.8 24.458 2607.9264 12745.3826 2447.8006 384472.1 7179.239 49.1172 239.4703 24.4582 7179.2 135.260 hence, volume = 10218 > tail(sort(e5$sqdist)) ## 4 values 5.00039 ... 5.0099 [1] 4.999915 5.000005 5.000010 5.000088 5.001444 5.009849 > > proc.time() user system elapsed 0.218 0.023 0.223 cluster/tests/daisy-ex.Rout.save0000644000175100001440000012367111646600064016463 0ustar hornikusers R version 2.14.0 alpha (2011-10-06 r57181) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## For different cluster versions > > require(cluster) Loading required package: cluster > > if(interactive()) { + (pkgPath <- .find.package("cluster", verbose = TRUE)) + (verC <- readLines(Dfile <- file.path(pkgPath, "DESCRIPTION"), n = 2)[2]) + } > > ## trivial cases should 'work': > daisy(cbind(1)) Dissimilarities : dissimilarity(0) Metric : euclidean Number of objects : 1 > (d10 <- daisy(matrix(0., 1,0))); str(d10) Dissimilarities : dissimilarity(0) Metric : euclidean Number of objects : 1 Classes 'dissimilarity', 'dist' atomic (0) ..- attr(*, "Size")= int 1 ..- attr(*, "Metric")= chr "euclidean" > d01 <- daisy(matrix(0., 0,1)) > if(paste(R.version$major, R.version$minor, sep=".") >= "2.1.0") + print(d01) Dissimilarities : dissimilarity(0) Metric : euclidean Number of objects : 0 > str(d01) Classes 'dissimilarity', 'dist' atomic (0) ..- attr(*, "Size")= int 0 ..- attr(*, "Metric")= chr "euclidean" > d32 <- data.frame(eins=c("A"=1,"B"=1,"C"=1), zwei=c(2,2,2)) > daisy(d32) Dissimilarities : A B B 0 C 0 0 Metric : euclidean Number of objects : 3 > daisy(d32, stand = TRUE) Dissimilarities : A B B 0 C 0 0 Metric : euclidean Number of objects : 3 Warning message: In daisy(d32, stand = TRUE) : 'x' has constant columns 1, 2; these are standardized to 0 > daisy(d32, type = list(ordratio="zwei")) Dissimilarities : A B B 0 C 0 0 Metric : mixed ; Types = I, T Number of objects : 3 > > > str(d5 <- data.frame(a= c(0, 0, 0,1,0,0, 0,0,1, 0,NA), + b= c(NA,0, 1,1,0,1, 0,1,0, 1,0), + c= c(0, 1, 1,0,1,NA,1,0,1, 0,NA), + d= c(1, 1, 0,1,0,0, 0,0,0, 1,0), + e= c(1, NA,0,1,0,0, 0,0,NA,1,1))) 'data.frame': 11 obs. of 5 variables: $ a: num 0 0 0 1 0 0 0 0 1 0 ... $ b: num NA 0 1 1 0 1 0 1 0 1 ... $ c: num 0 1 1 0 1 NA 1 0 1 0 ... $ d: num 1 1 0 1 0 0 0 0 0 1 ... $ e: num 1 NA 0 1 0 0 0 0 NA 1 ... > (d0 <- daisy(d5)) Dissimilarities : 1 2 3 4 5 6 7 8 2 1.290994 3 1.936492 1.581139 4 1.118034 1.936492 2.000000 5 1.936492 1.118034 1.000000 2.236068 6 1.825742 1.825742 0.000000 1.936492 1.118034 7 1.936492 1.118034 1.000000 2.236068 0.000000 1.118034 8 1.581139 1.936492 1.000000 1.732051 1.414214 0.000000 1.414214 9 2.236068 1.581139 1.581139 1.936492 1.118034 1.825742 1.118034 1.936492 10 0.000000 1.581139 1.732051 1.000000 2.000000 1.581139 2.000000 1.414214 11 1.581139 1.581139 1.825742 1.825742 1.290994 1.825742 1.290994 1.825742 9 10 2 3 4 5 6 7 8 9 10 2.236068 11 0.000000 1.825742 Metric : euclidean Number of objects : 11 Warning message: In daisy(d5) : binary variable(s) 1, 2, 3, 4, 5 treated as interval scaled > (d1 <- daisy(d5, type = list(asymm = 1:5))) Dissimilarities : 1 2 3 4 5 6 7 2 0.5000000 3 1.0000000 0.6666667 4 0.3333333 0.7500000 0.8000000 5 1.0000000 0.5000000 0.5000000 1.0000000 6 1.0000000 1.0000000 0.0000000 0.7500000 1.0000000 7 1.0000000 0.5000000 0.5000000 1.0000000 0.0000000 1.0000000 8 1.0000000 1.0000000 0.5000000 0.7500000 1.0000000 0.0000000 1.0000000 9 1.0000000 0.6666667 0.6666667 0.7500000 0.5000000 1.0000000 0.5000000 10 0.0000000 0.6666667 0.7500000 0.2500000 1.0000000 0.6666667 1.0000000 11 0.5000000 1.0000000 1.0000000 0.6666667 1.0000000 1.0000000 1.0000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.6666667 1.0000000 11 1.0000000 NA 0.6666667 Metric : mixed ; Types = A, A, A, A, A Number of objects : 11 > (d2 <- daisy(d5, type = list(symm = 1:2, asymm= 3:5))) Dissimilarities : 1 2 3 4 5 6 7 2 0.3333333 3 0.7500000 0.5000000 4 0.3333333 0.7500000 0.8000000 5 0.7500000 0.2500000 0.3333333 1.0000000 6 0.6666667 0.6666667 0.0000000 0.7500000 0.5000000 7 0.7500000 0.2500000 0.3333333 1.0000000 0.0000000 0.5000000 8 0.6666667 0.7500000 0.3333333 0.7500000 0.6666667 0.0000000 0.6666667 9 1.0000000 0.5000000 0.6666667 0.7500000 0.3333333 1.0000000 0.3333333 10 0.0000000 0.5000000 0.6000000 0.2500000 0.8000000 0.5000000 0.8000000 11 0.5000000 0.5000000 1.0000000 0.6666667 0.5000000 1.0000000 0.5000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.5000000 1.0000000 11 1.0000000 0.0000000 0.6666667 Metric : mixed ; Types = S, S, A, A, A Number of objects : 11 > (d2.<- daisy(d5, type = list( asymm= 3:5))) Dissimilarities : 1 2 3 4 5 6 7 2 0.3333333 3 0.7500000 0.5000000 4 0.3333333 0.7500000 0.8000000 5 0.7500000 0.2500000 0.3333333 1.0000000 6 0.6666667 0.6666667 0.0000000 0.7500000 0.5000000 7 0.7500000 0.2500000 0.3333333 1.0000000 0.0000000 0.5000000 8 0.6666667 0.7500000 0.3333333 0.7500000 0.6666667 0.0000000 0.6666667 9 1.0000000 0.5000000 0.6666667 0.7500000 0.3333333 1.0000000 0.3333333 10 0.0000000 0.5000000 0.6000000 0.2500000 0.8000000 0.5000000 0.8000000 11 0.5000000 0.5000000 1.0000000 0.6666667 0.5000000 1.0000000 0.5000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.5000000 1.0000000 11 1.0000000 0.0000000 0.6666667 Metric : mixed ; Types = I, I, A, A, A Number of objects : 11 Warning message: In daisy(d5, type = list(asymm = 3:5)) : binary variable(s) 1, 2 treated as interval scaled > stopifnot(identical(c(d2), c(d2.))) > (dS <- daisy(d5, stand = TRUE))# gave error in some versions Dissimilarities : 1 2 3 4 5 6 7 8 2 2.614264 3 4.010913 3.291786 4 3.493856 4.725761 4.757684 5 4.010913 2.415752 2.000000 5.160965 6 3.823025 3.801028 0.000000 4.813384 2.236068 7 4.010913 2.415752 2.000000 5.160965 0.000000 2.236068 8 3.310837 3.995202 2.025000 4.305222 2.846160 0.000000 2.846160 9 5.558018 4.247692 4.148136 3.995202 3.493856 4.789855 3.493856 4.725761 10 0.000000 3.182103 3.587469 3.125000 4.107303 3.310837 4.107303 2.961302 11 3.416389 3.416389 3.674376 3.801028 2.614264 3.674376 2.614264 3.674376 9 10 2 3 4 5 6 7 8 9 10 5.307417 11 0.000000 3.801028 Metric : euclidean Number of objects : 11 Warning message: In daisy(d5, stand = TRUE) : binary variable(s) 1, 2, 3, 4, 5 treated as interval scaled > stopifnot(all.equal(as.vector(summary(c(dS), digits=9)), + c(0, 2.6142638, 3.4938562, 3.2933687, 4.0591077, 5.5580177), + tol = 1e-7))# 7.88e-9 > > d5[,4] <- 1 # binary with only one instead of two values > (d0 <- daisy(d5)) Dissimilarities : 1 2 3 4 5 6 7 8 2 1.290994 3 1.581139 1.118034 4 1.118034 1.936492 1.732051 5 1.581139 0.000000 1.000000 2.000000 6 1.290994 1.290994 0.000000 1.581139 1.118034 7 1.581139 0.000000 1.000000 2.000000 0.000000 1.118034 8 1.118034 1.581139 1.000000 1.414214 1.414214 0.000000 1.414214 9 1.825742 1.118034 1.581139 1.581139 1.118034 1.825742 1.118034 1.936492 10 0.000000 1.581139 1.414214 1.000000 1.732051 1.118034 1.732051 1.000000 11 0.000000 0.000000 1.825742 1.290994 1.290994 1.825742 1.290994 1.825742 9 10 2 3 4 5 6 7 8 9 10 1.936492 11 0.000000 1.290994 Metric : euclidean Number of objects : 11 Warning message: In daisy(d5) : binary variable(s) 1, 2, 3, 5 treated as interval scaled > (d1 <- daisy(d5, type = list(asymm = 1:5)))# 2 NAs Dissimilarities : 1 2 3 4 5 6 7 2 1.0000000 3 1.0000000 0.5000000 4 0.5000000 1.0000000 0.7500000 5 1.0000000 0.0000000 0.5000000 1.0000000 6 1.0000000 1.0000000 0.0000000 0.6666667 1.0000000 7 1.0000000 0.0000000 0.5000000 1.0000000 0.0000000 1.0000000 8 1.0000000 1.0000000 0.5000000 0.6666667 1.0000000 0.0000000 1.0000000 9 1.0000000 0.5000000 0.6666667 0.6666667 0.5000000 1.0000000 0.5000000 10 0.0000000 1.0000000 0.6666667 0.3333333 1.0000000 0.5000000 1.0000000 11 0.0000000 NA 1.0000000 0.5000000 1.0000000 1.0000000 1.0000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.5000000 1.0000000 11 1.0000000 NA 0.5000000 Metric : mixed ; Types = A, A, A, A, A Number of objects : 11 Warning message: In daisy(d5, type = list(asymm = 1:5)) : at least one binary variable has not 2 different levels. > (d2 <- daisy(d5, type = list(symm = 1:2, asymm= 3:5))) Dissimilarities : 1 2 3 4 5 6 7 2 0.5000000 3 0.6666667 0.3333333 4 0.5000000 1.0000000 0.7500000 5 0.6666667 0.0000000 0.3333333 1.0000000 6 0.5000000 0.5000000 0.0000000 0.6666667 0.5000000 7 0.6666667 0.0000000 0.3333333 1.0000000 0.0000000 0.5000000 8 0.5000000 0.6666667 0.3333333 0.6666667 0.6666667 0.0000000 0.6666667 9 1.0000000 0.3333333 0.6666667 0.6666667 0.3333333 1.0000000 0.3333333 10 0.0000000 0.6666667 0.5000000 0.3333333 0.7500000 0.3333333 0.7500000 11 0.0000000 0.0000000 1.0000000 0.5000000 0.5000000 1.0000000 0.5000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.3333333 1.0000000 11 1.0000000 0.0000000 0.5000000 Metric : mixed ; Types = S, S, A, A, A Number of objects : 11 Warning message: In daisy(d5, type = list(symm = 1:2, asymm = 3:5)) : at least one binary variable has not 2 different levels. > (d2.<- daisy(d5, type = list( asymm= 3:5))) Dissimilarities : 1 2 3 4 5 6 7 2 0.5000000 3 0.6666667 0.3333333 4 0.5000000 1.0000000 0.7500000 5 0.6666667 0.0000000 0.3333333 1.0000000 6 0.5000000 0.5000000 0.0000000 0.6666667 0.5000000 7 0.6666667 0.0000000 0.3333333 1.0000000 0.0000000 0.5000000 8 0.5000000 0.6666667 0.3333333 0.6666667 0.6666667 0.0000000 0.6666667 9 1.0000000 0.3333333 0.6666667 0.6666667 0.3333333 1.0000000 0.3333333 10 0.0000000 0.6666667 0.5000000 0.3333333 0.7500000 0.3333333 0.7500000 11 0.0000000 0.0000000 1.0000000 0.5000000 0.5000000 1.0000000 0.5000000 8 9 10 2 3 4 5 6 7 8 9 1.0000000 10 0.3333333 1.0000000 11 1.0000000 0.0000000 0.5000000 Metric : mixed ; Types = I, I, A, A, A Number of objects : 11 Warning messages: 1: In daisy(d5, type = list(asymm = 3:5)) : at least one binary variable has not 2 different levels. 2: In daisy(d5, type = list(asymm = 3:5)) : binary variable(s) 1, 2 treated as interval scaled > ## better leave away the constant variable: it has no effect: > stopifnot(identical(c(d1), c(daisy(d5[,-4], type = list(asymm = 1:4))))) > > ###---- Trivial "binary only" matrices (not data frames) did fail: > > x <- matrix(0, 2, 2) > dimnames(x)[[2]] <- c("A", "B")## colnames<- is missing in S+ > daisy(x, type = list(symm= "B", asymm="A")) Dissimilarities : 1 2 0 Metric : mixed ; Types = A, S Number of objects : 2 Warning message: In daisy(x, type = list(symm = "B", asymm = "A")) : at least one binary variable has not 2 different levels. > daisy(x, type = list(symm= "B"))# 0 too Dissimilarities : 1 2 0 Metric : mixed ; Types = I, S Number of objects : 2 Warning message: In daisy(x, type = list(symm = "B")) : at least one binary variable has not 2 different levels. > > x2 <- x; x2[2,2] <- 1 > daisy(x2, type= list(symm = "B"))# |-> 0.5 (gives 1 in S+) Dissimilarities : 1 2 0.5 Metric : mixed ; Types = I, S Number of objects : 2 > daisy(x2, type= list(symm = "B", asymm="A"))# 1 Dissimilarities : 1 2 1 Metric : mixed ; Types = A, S Number of objects : 2 Warning message: In daisy(x2, type = list(symm = "B", asymm = "A")) : at least one binary variable has not 2 different levels. > > x3 <- x; x3[] <- diag(2) > daisy(x3) # warning: both as interval scaled -> sqrt(2) Dissimilarities : 1 2 1.414214 Metric : euclidean Number of objects : 2 > daisy(x3, type= list(symm="B", asymm="A"))# 1 Dissimilarities : 1 2 1 Metric : mixed ; Types = A, S Number of objects : 2 > daisy(x3, type= list(symm =c("B","A"))) # 1, S+: sqrt(2) Dissimilarities : 1 2 1 Metric : mixed ; Types = S, S Number of objects : 2 > daisy(x3, type= list(asymm=c("B","A"))) # 1, S+ : sqrt(2) Dissimilarities : 1 2 1 Metric : mixed ; Types = A, A Number of objects : 2 > > x4 <- rbind(x3, 1) > daisy(x4, type= list(symm="B", asymm="A"))# 1 0.5 0.5 Dissimilarities : 1 2 2 1.0 3 0.5 0.5 Metric : mixed ; Types = A, S Number of objects : 3 > daisy(x4, type= list(symm=c("B","A"))) # dito; S+ : 1.41 1 1 Dissimilarities : 1 2 2 1.0 3 0.5 0.5 Metric : mixed ; Types = S, S Number of objects : 3 > daisy(x4, type= list(asymm=c("A","B"))) # dito, dito Dissimilarities : 1 2 2 1.0 3 0.5 0.5 Metric : mixed ; Types = A, A Number of objects : 3 > > > > ## ----------- example(daisy) ----------------------- > > data(flower) > data(agriculture) > > ## Example 1 in ref: > ## Dissimilarities using Euclidean metric and without standardization > (d.agr <- daisy(agriculture, metric = "euclidean", stand = FALSE)) Dissimilarities : B DK D GR E F IRL DK 5.408327 D 2.061553 3.405877 GR 22.339651 22.570113 22.661200 E 9.818350 11.182576 10.394710 12.567418 F 3.448188 3.512834 2.657066 20.100995 8.060397 IRL 12.747549 13.306014 13.080138 9.604166 3.140064 10.564563 I 5.803447 5.470832 5.423099 17.383325 5.727128 2.773085 7.920859 L 4.275512 2.220360 2.300000 24.035391 12.121056 4.060788 14.569145 NL 1.649242 5.096077 2.435159 20.752349 8.280097 2.202272 11.150785 P 17.236299 17.864490 17.664088 5.162364 7.430343 15.164432 4.601087 UK 2.828427 8.052950 4.850773 21.485344 8.984431 5.303772 12.103718 I L NL P DK D GR E F IRL I L 6.660330 NL 4.204759 4.669047 P 12.515990 19.168985 15.670673 UK 6.723095 7.102112 3.124100 16.323296 Metric : euclidean Number of objects : 12 > (d.agr2 <- daisy(agriculture, metric = "manhattan")) Dissimilarities : B DK D GR E F IRL I L NL P DK 7.5 D 2.7 4.8 GR 30.4 31.9 31.5 E 13.6 15.1 14.7 16.8 F 4.3 3.8 3.4 28.1 11.3 IRL 17.2 18.7 18.3 13.2 3.6 14.9 I 6.0 7.5 7.1 24.4 7.6 3.7 11.2 L 5.0 2.5 2.3 33.8 17.0 5.7 20.6 9.4 NL 2.0 6.3 3.1 28.4 11.6 3.1 15.2 4.4 5.4 P 23.7 25.2 24.8 6.7 10.1 21.4 6.5 17.7 27.1 21.7 UK 3.2 10.7 5.9 28.0 11.2 7.5 14.8 8.8 8.2 4.4 21.3 Metric : manhattan Number of objects : 12 > > > ## Example 2 in ref > (dfl0 <- daisy(flower)) Dissimilarities : 1 2 3 4 5 6 7 2 0.8875408 3 0.5272467 0.5147059 4 0.3517974 0.5504493 0.5651552 5 0.4115605 0.6226307 0.3726307 0.6383578 6 0.2269199 0.6606209 0.3003268 0.4189951 0.3443627 7 0.2876225 0.5999183 0.4896242 0.3435866 0.4197712 0.1892974 8 0.4234069 0.4641340 0.6038399 0.2960376 0.4673203 0.5714869 0.4107843 9 0.5808824 0.4316585 0.4463644 0.8076797 0.3306781 0.5136846 0.5890931 10 0.6094363 0.4531046 0.4678105 0.5570670 0.3812908 0.4119281 0.5865196 11 0.3278595 0.7096814 0.5993873 0.6518791 0.3864788 0.4828840 0.5652369 12 0.4267565 0.5857843 0.6004902 0.5132761 0.5000817 0.5248366 0.6391340 13 0.5196487 0.5248366 0.5395425 0.7464461 0.2919118 0.4524510 0.5278595 14 0.2926062 0.5949346 0.6096405 0.3680147 0.5203431 0.3656863 0.5049837 15 0.6221814 0.3903595 0.5300654 0.5531454 0.4602124 0.5091503 0.3345588 16 0.6935866 0.3575163 0.6222222 0.3417892 0.7301471 0.5107843 0.4353758 17 0.7765114 0.1904412 0.5801471 0.4247141 0.6880719 0.5937092 0.5183007 18 0.4610294 0.4515114 0.7162173 0.4378268 0.4755310 0.6438317 0.4692402 8 9 10 11 12 13 14 2 3 4 5 6 7 8 9 0.6366422 10 0.6639706 0.4256127 11 0.4955474 0.4308007 0.3948121 12 0.4216503 0.4194036 0.3812092 0.2636029 13 0.5754085 0.2181781 0.3643791 0.3445670 0.2331699 14 0.4558007 0.4396650 0.3609477 0.2838644 0.1591503 0.3784314 15 0.4512255 0.2545343 0.4210784 0.4806781 0.4295752 0.3183007 0.4351307 16 0.6378268 0.6494690 0.3488562 0.7436683 0.6050654 0.5882353 0.4598039 17 0.4707516 0.6073938 0.3067810 0.7015931 0.5629902 0.5461601 0.5427288 18 0.1417892 0.5198529 0.8057598 0.5359477 0.5495507 0.5733252 0.5698121 15 16 17 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 0.3949346 17 0.3528595 0.1670752 18 0.5096814 0.7796160 0.6125408 Metric : mixed ; Types = N, N, N, N, O, O, I, I Number of objects : 18 > stopifnot(identical(c(dfl0), + c(daisy(flower, type = list(symm = 1)))) && + identical(c(dfl0), + c(daisy(flower, type = list(symm = 2)))) && + identical(c(dfl0), + c(daisy(flower, type = list(symm = 3)))) && + identical(c(dfl0), + c(daisy(flower, type = list(symm = c(1,3))))) + ) > > (dfl1 <- daisy(flower, type = list(asymm = 3))) Dissimilarities : 1 2 3 4 5 6 7 2 0.8875408 3 0.5272467 0.5882353 4 0.3517974 0.5504493 0.5651552 5 0.4115605 0.7115780 0.4258637 0.6383578 6 0.2269199 0.7549953 0.3432306 0.4189951 0.3935574 7 0.2876225 0.6856209 0.5595705 0.3435866 0.4797386 0.2163399 8 0.4234069 0.4641340 0.6038399 0.2960376 0.4673203 0.5714869 0.4107843 9 0.5808824 0.4933240 0.5101307 0.8076797 0.3779178 0.5870682 0.6732493 10 0.6094363 0.5178338 0.5346405 0.5570670 0.4357610 0.4707750 0.6703081 11 0.3278595 0.7096814 0.5993873 0.6518791 0.3864788 0.4828840 0.5652369 12 0.4267565 0.5857843 0.6004902 0.5132761 0.5000817 0.5248366 0.6391340 13 0.5196487 0.5998133 0.6166200 0.7464461 0.3336134 0.5170868 0.6032680 14 0.2926062 0.5949346 0.6096405 0.3680147 0.5203431 0.3656863 0.5049837 15 0.6221814 0.4461251 0.6057890 0.5531454 0.5259570 0.5818861 0.3823529 16 0.6935866 0.4085901 0.7111111 0.3417892 0.8344538 0.5837535 0.4975724 17 0.7765114 0.2176471 0.6630252 0.4247141 0.7863679 0.6785247 0.5923436 18 0.4610294 0.4515114 0.7162173 0.4378268 0.4755310 0.6438317 0.4692402 8 9 10 11 12 13 14 2 3 4 5 6 7 8 9 0.6366422 10 0.6639706 0.4864146 11 0.4955474 0.4308007 0.3948121 12 0.4216503 0.4194036 0.3812092 0.2636029 13 0.5754085 0.2493464 0.4164332 0.3445670 0.2331699 14 0.4558007 0.4396650 0.3609477 0.2838644 0.1591503 0.3784314 15 0.4512255 0.2908964 0.4812325 0.4806781 0.4295752 0.3637722 0.4351307 16 0.6378268 0.7422502 0.3986928 0.7436683 0.6050654 0.6722689 0.4598039 17 0.4707516 0.6941643 0.3506069 0.7015931 0.5629902 0.6241830 0.5427288 18 0.1417892 0.5198529 0.8057598 0.5359477 0.5495507 0.5733252 0.5698121 15 16 17 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 0.4513539 17 0.4032680 0.1909430 18 0.5096814 0.7796160 0.6125408 Metric : mixed ; Types = N, N, A, N, O, O, I, I Number of objects : 18 > (dfl2 <- daisy(flower, type = list(asymm = c(1, 3), ordratio = 7))) Dissimilarities : 1 2 3 4 5 6 7 2 0.9007353 3 0.6176471 0.5882353 4 0.4226891 0.5455882 0.6403361 5 0.4806723 0.7369748 0.5264706 0.7605042 6 0.2823529 0.7470588 0.3911765 0.4764706 0.4980392 7 0.3310924 0.6983193 0.6676471 0.4109244 0.5745098 0.2764706 8 0.5100840 0.4544118 0.6789916 0.3327731 0.5705882 0.6563025 0.4932773 9 0.5808824 0.5084034 0.5252101 0.8257353 0.3882353 0.6100840 0.6756303 10 0.6323529 0.5067227 0.5235294 0.5522059 0.4722689 0.4739496 0.6941176 11 0.3389706 0.7117647 0.6014706 0.6588235 0.4066176 0.4919118 0.5742647 12 0.4441176 0.5816176 0.5963235 0.5139706 0.5264706 0.5220588 0.6544118 13 0.5286765 0.6252101 0.6420168 0.7735294 0.3336134 0.5504202 0.6159664 14 0.3044118 0.5963235 0.6110294 0.3742647 0.5411765 0.3573529 0.5147059 15 0.6242647 0.4588235 0.6184874 0.5691176 0.5386555 0.6025210 0.3823529 16 0.6845588 0.3831933 0.6857143 0.3147059 0.8344538 0.5504202 0.4848739 17 0.7897059 0.2176471 0.6630252 0.4198529 0.8117647 0.6705882 0.6050420 18 0.5268908 0.4647059 0.8336134 0.5210084 0.5537815 0.7588235 0.5386555 8 9 10 11 12 13 14 2 3 4 5 6 7 8 9 0.6595588 10 0.6639706 0.5126050 11 0.5073529 0.4419118 0.4066176 12 0.4272059 0.4367647 0.3867647 0.2698529 13 0.6073529 0.2596639 0.4529412 0.3647059 0.2595588 14 0.4669118 0.4514706 0.3720588 0.2845588 0.1647059 0.3992647 15 0.4720588 0.2932773 0.5050420 0.4897059 0.4448529 0.3764706 0.4448529 16 0.6058824 0.7319328 0.3621849 0.7235294 0.5786765 0.6722689 0.4389706 17 0.4610294 0.7092437 0.3394958 0.7036765 0.5588235 0.6495798 0.5441176 18 0.1882353 0.5198529 0.8286765 0.5470588 0.5669118 0.5823529 0.5816176 15 16 17 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 0.4386555 17 0.4159664 0.1655462 18 0.5117647 0.7705882 0.6257353 Metric : mixed ; Types = A, N, A, N, O, O, T, I Number of objects : 18 > (dfl3 <- daisy(flower, type = list(asymm = 1:3))) Dissimilarities : 1 2 3 4 5 6 7 2 0.8875408 3 0.6025677 0.5882353 4 0.4020542 0.6290850 0.6458917 5 0.4703548 0.7115780 0.4968410 0.7295518 6 0.2593371 0.7549953 0.4004357 0.4788515 0.4591503 7 0.3287115 0.7998911 0.6528322 0.4581155 0.5596950 0.2523965 8 0.4838936 0.5304388 0.6901027 0.3947168 0.5340803 0.6531279 0.5477124 9 0.5808824 0.4933240 0.5101307 0.8076797 0.3779178 0.5870682 0.6732493 10 0.6094363 0.5178338 0.5346405 0.5570670 0.4357610 0.4707750 0.6703081 11 0.3278595 0.7096814 0.5993873 0.6518791 0.3864788 0.4828840 0.5652369 12 0.4267565 0.5857843 0.6004902 0.5132761 0.5000817 0.5248366 0.6391340 13 0.5196487 0.5998133 0.6166200 0.7464461 0.3336134 0.5170868 0.6032680 14 0.2926062 0.5949346 0.6096405 0.3680147 0.5203431 0.3656863 0.5049837 15 0.6221814 0.5204793 0.6057890 0.6321662 0.5259570 0.5818861 0.4460784 16 0.6935866 0.4766885 0.7111111 0.3906162 0.8344538 0.5837535 0.5805011 17 0.7765114 0.2539216 0.6630252 0.4853875 0.7863679 0.6785247 0.6910675 18 0.5268908 0.5160131 0.8185341 0.5837691 0.5434641 0.7358077 0.6256536 8 9 10 11 12 13 14 2 3 4 5 6 7 8 9 0.6366422 10 0.6639706 0.4864146 11 0.4955474 0.4308007 0.3948121 12 0.4216503 0.4194036 0.3812092 0.2636029 13 0.5754085 0.2493464 0.4164332 0.3445670 0.2331699 14 0.4558007 0.4396650 0.3609477 0.2838644 0.1591503 0.3784314 15 0.5156863 0.2908964 0.4812325 0.4806781 0.4295752 0.3637722 0.4351307 16 0.7289449 0.7422502 0.3986928 0.7436683 0.6050654 0.6722689 0.4598039 17 0.5380019 0.6941643 0.3506069 0.7015931 0.5629902 0.6241830 0.5427288 18 0.1890523 0.5198529 0.8057598 0.5359477 0.5495507 0.5733252 0.5698121 15 16 17 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 0.5265795 17 0.4704793 0.2227669 18 0.5824930 0.8909897 0.7000467 Metric : mixed ; Types = A, A, A, N, O, O, I, I Number of objects : 18 > > ## --- animals > data(animals) > d0 <- daisy(animals) Warning message: In daisy(animals) : binary variable(s) 1, 2, 3, 4, 5, 6 treated as interval scaled > > d1 <- daisy(animals - 1, type=list(asymm=c(2,4))) Warning message: In daisy(animals - 1, type = list(asymm = c(2, 4))) : binary variable(s) 1, 3, 5, 6 treated as interval scaled > (d2 <- daisy(animals - 1, type=list(symm = c(1,3,5,6), asymm=c(2,4)))) Dissimilarities : ant bee cat cpl chi cow duc bee 0.4000000 cat 1.0000000 0.8000000 cpl 0.5000000 0.4000000 0.5000000 chi 0.8000000 0.6666667 0.4000000 0.8000000 cow 0.7500000 0.6000000 0.2500000 0.7500000 0.2000000 duc 0.6000000 0.6000000 0.6000000 1.0000000 0.5000000 0.4000000 eag 0.8333333 0.8333333 0.5000000 0.8333333 0.5000000 0.6666667 0.3333333 ele 0.6000000 0.8333333 0.6000000 1.0000000 0.2000000 0.4000000 0.3333333 fly 0.4000000 0.4000000 0.8000000 0.4000000 1.0000000 1.0000000 0.6000000 fro 0.5000000 0.8000000 0.7500000 0.7500000 0.5000000 0.7500000 0.6000000 her 0.2500000 0.6000000 0.7500000 0.7500000 0.6000000 0.5000000 0.4000000 lio 0.7500000 0.6000000 0.2500000 0.7500000 0.0000000 0.0000000 0.4000000 liz 0.5000000 0.8000000 0.5000000 0.5000000 0.8000000 0.7500000 0.6000000 lob 0.0000000 0.5000000 1.0000000 0.3333333 1.0000000 1.0000000 0.7500000 man 0.8000000 0.6666667 0.4000000 0.8000000 0.0000000 0.2000000 0.5000000 rab 0.7500000 0.6000000 0.2500000 0.7500000 0.2000000 0.0000000 0.4000000 sal 0.3333333 0.7500000 0.6666667 0.6666667 0.7500000 0.6666667 0.5000000 spi 0.5000000 0.4000000 0.5000000 0.0000000 0.7500000 0.7500000 1.0000000 wha 0.6000000 0.8333333 0.6000000 1.0000000 0.2000000 0.4000000 0.3333333 eag ele fly fro her lio liz bee cat cpl chi cow duc eag ele 0.3333333 fly 0.5000000 0.8333333 fro 0.4000000 0.2500000 0.6000000 her 0.6666667 0.4000000 0.6000000 0.2500000 lio 0.6000000 0.2500000 1.0000000 0.6666667 0.5000000 liz 0.5000000 0.6000000 0.4000000 0.2500000 0.2500000 0.7500000 lob 0.8000000 0.7500000 0.2500000 0.5000000 0.3333333 1.0000000 0.3333333 man 0.5000000 0.2000000 1.0000000 0.5000000 0.6000000 0.0000000 0.8000000 rab 0.6666667 0.4000000 1.0000000 0.7500000 0.5000000 0.0000000 0.7500000 sal 0.6000000 0.5000000 0.5000000 0.2500000 0.0000000 0.6666667 0.0000000 spi 0.8000000 1.0000000 0.4000000 0.6666667 0.7500000 0.7500000 0.5000000 wha 0.3333333 0.0000000 0.8333333 0.2500000 0.4000000 0.2500000 0.6000000 lob man rab sal spi bee cat cpl chi cow duc eag ele fly fro her lio liz lob man 1.0000000 rab 1.0000000 0.2000000 sal 0.3333333 0.7500000 0.6666667 spi 0.3333333 0.7500000 0.7500000 0.6666667 wha 0.7500000 0.2000000 0.4000000 0.5000000 1.0000000 Metric : mixed ; Types = S, A, S, A, S, S Number of objects : 20 > stopifnot(c(d1) == c(d2)) > > d3 <- daisy(2 - animals, type=list(asymm=c(2,4))) Warning message: In daisy(2 - animals, type = list(asymm = c(2, 4))) : binary variable(s) 1, 3, 5, 6 treated as interval scaled > (d4 <- daisy(2 - animals, type=list(symm = c(1,3,5,6), asymm=c(2,4)))) Dissimilarities : ant bee cat cpl chi cow duc bee 0.3333333 cat 0.6666667 0.6666667 cpl 0.3333333 0.3333333 0.3333333 chi 0.6666667 0.6666667 0.3333333 0.6666667 cow 0.5000000 0.5000000 0.1666667 0.5000000 0.1666667 duc 0.5000000 0.6000000 0.5000000 0.8333333 0.5000000 0.3333333 eag 0.8333333 1.0000000 0.5000000 0.8333333 0.6000000 0.6666667 0.4000000 ele 0.5000000 0.8333333 0.5000000 0.8333333 0.2000000 0.3333333 0.3333333 fly 0.3333333 0.4000000 0.6666667 0.3333333 1.0000000 0.8333333 0.6000000 fro 0.4000000 0.8000000 0.6000000 0.6000000 0.5000000 0.6000000 0.6000000 her 0.1666667 0.5000000 0.5000000 0.5000000 0.5000000 0.3333333 0.3333333 lio 0.6000000 0.6000000 0.2000000 0.6000000 0.0000000 0.0000000 0.4000000 liz 0.3333333 0.6666667 0.3333333 0.3333333 0.6666667 0.5000000 0.5000000 lob 0.0000000 0.4000000 0.6000000 0.2000000 0.8000000 0.6000000 0.6000000 man 0.6666667 0.6666667 0.3333333 0.6666667 0.0000000 0.1666667 0.5000000 rab 0.5000000 0.5000000 0.1666667 0.5000000 0.1666667 0.0000000 0.3333333 sal 0.2000000 0.6000000 0.4000000 0.4000000 0.6000000 0.4000000 0.4000000 spi 0.4000000 0.4000000 0.4000000 0.0000000 0.6000000 0.6000000 1.0000000 wha 0.5000000 0.8333333 0.5000000 0.8333333 0.2000000 0.3333333 0.3333333 eag ele fly fro her lio liz bee cat cpl chi cow duc eag ele 0.4000000 fly 0.6000000 0.8333333 fro 0.5000000 0.2500000 0.6000000 her 0.6666667 0.3333333 0.5000000 0.2000000 lio 0.6000000 0.2000000 1.0000000 0.5000000 0.4000000 liz 0.5000000 0.5000000 0.3333333 0.2000000 0.1666667 0.6000000 lob 0.8000000 0.6000000 0.2000000 0.4000000 0.2000000 0.7500000 0.2000000 man 0.6000000 0.2000000 1.0000000 0.5000000 0.5000000 0.0000000 0.6666667 rab 0.6666667 0.3333333 0.8333333 0.6000000 0.3333333 0.0000000 0.5000000 sal 0.6000000 0.4000000 0.4000000 0.2000000 0.0000000 0.5000000 0.0000000 spi 0.8000000 0.8000000 0.4000000 0.5000000 0.6000000 0.6000000 0.4000000 wha 0.4000000 0.0000000 0.8333333 0.2500000 0.3333333 0.2000000 0.5000000 lob man rab sal spi bee cat cpl chi cow duc eag ele fly fro her lio liz lob man 0.8000000 rab 0.6000000 0.1666667 sal 0.2000000 0.6000000 0.4000000 spi 0.2500000 0.6000000 0.6000000 0.5000000 wha 0.6000000 0.2000000 0.3333333 0.4000000 0.8000000 Metric : mixed ; Types = S, A, S, A, S, S Number of objects : 20 > stopifnot(c(d3) == c(d4)) > > pairs(cbind(d0,d2,d4), + main = "Animals -- symmetric and asymm. dissimilarities") > cluster/tests/clara.Rout.save0000644000175100001440000011747712461440417016031 0ustar hornikusers R Under development (unstable) (2015-01-20 r67564) -- "Unsuffered Consequences" Copyright (C) 2015 The R Foundation for Statistical Computing Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > ## generate 1500 objects, divided into 2 clusters. > set.seed(144) > x <- rbind(cbind(rnorm(700, 0,8), rnorm(700, 0,8)), + cbind(rnorm(800,50,8), rnorm(800,10,8))) > > isEq <- function(x,y, epsF = 100) + is.logical(r <- all.equal(x,y, tol = epsF * .Machine$double.eps)) && r > > .proctime00 <- proc.time() > > ## full size sample {should be = pam()}: > n0 <- length(iSml <- c(1:70, 701:720)) > summary(clara0 <- clara(x[iSml,], k = 2, sampsize = n0)) Object of class 'clara' from call: clara(x = x[iSml, ], k = 2, sampsize = n0) Medoids: [,1] [,2] [1,] -1.499522 -1.944452 [2,] 48.629631 12.998515 Objective function: 10.23588 Numerical information per cluster: size max_diss av_diss isolation [1,] 70 24.81995 10.25745 0.4744879 [2,] 20 19.07782 10.16040 0.3647145 Average silhouette width per cluster: [1] 0.7144587 0.7090915 Average silhouette width of best sample: 0.713266 Best sample: [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 [51] 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 [76] 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 Clustering vector: [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [39] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 [77] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 Silhouette plot information for best sample: cluster neighbor sil_width 45 1 2 0.8033727 60 1 2 0.8021017 55 1 2 0.8005931 66 1 2 0.8002776 58 1 2 0.7991899 11 1 2 0.7991773 41 1 2 0.7973302 26 1 2 0.7962397 63 1 2 0.7962229 13 1 2 0.7949705 67 1 2 0.7942590 54 1 2 0.7936184 17 1 2 0.7916087 16 1 2 0.7913570 39 1 2 0.7912755 6 1 2 0.7840455 34 1 2 0.7833568 49 1 2 0.7819733 9 1 2 0.7789087 23 1 2 0.7785009 32 1 2 0.7757325 22 1 2 0.7655369 61 1 2 0.7639754 12 1 2 0.7639644 5 1 2 0.7606436 18 1 2 0.7579145 56 1 2 0.7566307 3 1 2 0.7537894 24 1 2 0.7531180 50 1 2 0.7517817 48 1 2 0.7501998 25 1 2 0.7499655 59 1 2 0.7472022 19 1 2 0.7445038 65 1 2 0.7398395 28 1 2 0.7377377 38 1 2 0.7370935 7 1 2 0.7335940 40 1 2 0.7310012 14 1 2 0.7294895 62 1 2 0.7254478 70 1 2 0.7163214 4 1 2 0.7157257 21 1 2 0.7148663 64 1 2 0.7108496 2 1 2 0.7062831 15 1 2 0.7015120 52 1 2 0.6978313 37 1 2 0.6954023 31 1 2 0.6932905 33 1 2 0.6888478 10 1 2 0.6805028 20 1 2 0.6766854 43 1 2 0.6761461 8 1 2 0.6749706 27 1 2 0.6671817 35 1 2 0.6632888 68 1 2 0.6587599 30 1 2 0.6554989 36 1 2 0.6228481 53 1 2 0.6203313 57 1 2 0.6191666 42 1 2 0.6142020 47 1 2 0.6024151 1 1 2 0.5814464 69 1 2 0.5091186 46 1 2 0.4961302 44 1 2 0.4849961 29 1 2 0.4569316 51 1 2 0.4230181 81 2 1 0.7965942 71 2 1 0.7961971 85 2 1 0.7919593 74 2 1 0.7869047 82 2 1 0.7795304 78 2 1 0.7788873 79 2 1 0.7729041 72 2 1 0.7492980 88 2 1 0.7447973 87 2 1 0.7404399 76 2 1 0.7352351 77 2 1 0.7216838 86 2 1 0.7165677 84 2 1 0.6952406 73 2 1 0.6942882 83 2 1 0.6621568 80 2 1 0.6368446 90 2 1 0.5743228 75 2 1 0.5597232 89 2 1 0.4482549 4005 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 0.18647 11.58500 20.05800 27.81500 45.57800 85.23200 Metric : euclidean Number of objects : 90 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > pam0 <- pam (x[iSml,], k = 2) > stopifnot(identical(clara0$clustering, pam0$clustering) + , isEq(clara0$objective, unname(pam0$objective[2])) + ) > > summary(clara2 <- clara(x, 2)) Object of class 'clara' from call: clara(x = x, k = 2) Medoids: [,1] [,2] [1,] 2.012828 -1.896095 [2,] 51.494628 10.274769 Objective function: 10.23445 Numerical information per cluster: size max_diss av_diss isolation [1,] 700 36.84408 10.49814 0.7230478 [2,] 800 30.89896 10.00373 0.6063775 Average silhouette width per cluster: [1] 0.7562366 0.7203254 Average silhouette width of best sample: 0.733384 Best sample: [1] 21 23 50 97 142 168 191 192 197 224 325 328 433 458 471 [16] 651 712 714 722 797 805 837 909 919 926 999 1006 1018 1019 1049 [31] 1081 1084 1132 1144 1150 1201 1207 1250 1291 1307 1330 1374 1426 1428 Clustering vector: [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [186] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [223] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [260] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [334] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [371] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [482] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [519] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [556] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [593] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [630] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [667] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 [704] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [741] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [778] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [815] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [852] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [889] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [926] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [963] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1000] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1037] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1074] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1111] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1148] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1185] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1222] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1259] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1296] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1333] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1370] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1407] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1444] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [1481] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 Silhouette plot information for best sample: cluster neighbor sil_width 325 1 2 0.8261589 191 1 2 0.8206687 23 1 2 0.8149640 97 1 2 0.8048084 433 1 2 0.8017745 458 1 2 0.8008324 471 1 2 0.7958547 328 1 2 0.7689099 142 1 2 0.7619508 21 1 2 0.7607528 197 1 2 0.7606641 50 1 2 0.7509131 192 1 2 0.7098473 651 1 2 0.7035969 224 1 2 0.6843886 168 1 2 0.5337006 1084 2 1 0.8180447 1081 2 1 0.8171686 1201 2 1 0.8170847 1291 2 1 0.8167148 1307 2 1 0.8166841 1144 2 1 0.8159947 999 2 1 0.8135303 1426 2 1 0.8023538 1049 2 1 0.8022891 1250 2 1 0.8014300 712 2 1 0.7859324 837 2 1 0.7792784 1018 2 1 0.7764837 919 2 1 0.7651939 1374 2 1 0.7648534 1428 2 1 0.7516819 1330 2 1 0.7505861 1006 2 1 0.7368113 714 2 1 0.7237565 1150 2 1 0.7046060 1132 2 1 0.6940608 909 2 1 0.6859682 926 2 1 0.6725631 722 2 1 0.6572791 797 2 1 0.6395698 1019 2 1 0.6083662 805 2 1 0.2814164 1207 2 1 0.2694097 946 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 0.48462 12.32300 26.49900 32.21300 52.39100 77.17500 Metric : euclidean Number of objects : 44 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > > clInd <- c("objective", "i.med", "medoids", "clusinfo") > clInS <- c(clInd, "sample") > ## clara() {as original code} always draws the *same* random samples !!!! > clara(x, 2, samples = 50)[clInd] $objective [1] 10.06735 $i.med [1] 177 1115 $medoids [,1] [,2] [1,] -0.2538744 -1.209148 [2,] 50.0372683 9.501125 $clusinfo size max_diss av_diss isolation [1,] 700 34.67208 10.193945 0.6743054 [2,] 800 29.51964 9.956571 0.5741003 > for(i in 1:20) + print(clara(x[sample(nrow(x)),], 2, samples = 50)[clInd]) $objective [1] 10.05727 $i.med [1] 936 192 $medoids [,1] [,2] [1,] 50.03726827 9.501124850 [2,] -0.03900399 -0.009078886 $clusinfo size max_diss av_diss isolation [1,] 800 29.51964 9.956571 0.5791419 [2,] 700 34.06055 10.172348 0.6682295 $objective [1] 10.05296 $i.med [1] 468 1394 $medoids [,1] [,2] [1,] -0.3292826 -0.2398794 [2,] 50.0372683 9.5011249 $clusinfo size max_diss av_diss isolation [1,] 700 33.98451 10.163128 0.6624677 [2,] 800 29.51964 9.956571 0.5754330 $objective [1] 10.05852 $i.med [1] 1171 379 $medoids [,1] [,2] [1,] 50.9444060 9.6723175 [2,] -0.3292826 -0.2398794 $clusinfo size max_diss av_diss isolation [1,] 800 30.10388 9.966988 0.5764486 [2,] 700 33.98451 10.163128 0.6507574 $objective [1] 10.07051 $i.med [1] 75 1254 $medoids [,1] [,2] [1,] -0.9493373 0.3552542 [2,] 50.5455985 9.3904972 $clusinfo size max_diss av_diss isolation [1,] 700 33.12704 10.191999 0.6336273 [2,] 800 29.66384 9.964205 0.5673860 $objective [1] 10.0613 $i.med [1] 199 134 $medoids [,1] [,2] [1,] -0.03900399 -0.009078886 [2,] 49.59384120 9.792964832 $clusinfo size max_diss av_diss isolation [1,] 700 34.06055 10.172348 0.6732466 [2,] 800 29.57491 9.964138 0.5845827 $objective [1] 10.06101 $i.med [1] 1453 1122 $medoids [,1] [,2] [1,] 50.0372683 9.50112485 [2,] -0.9691441 0.03342515 $clusinfo size max_diss av_diss isolation [1,] 800 29.51964 9.956571 0.5690241 [2,] 700 33.31923 10.180359 0.6422655 $objective [1] 10.08603 $i.med [1] 613 318 $medoids [,1] [,2] [1,] 50.0627056 9.478225 [2,] -0.2902194 1.026496 $clusinfo size max_diss av_diss isolation [1,] 800 29.51131 9.957225 0.5780037 [2,] 700 33.21560 10.233240 0.6505552 $objective [1] 10.07293 $i.med [1] 618 406 $medoids [,1] [,2] [1,] 50.3621263 9.0207185 [2,] -0.2092816 -0.5916053 $clusinfo size max_diss av_diss isolation [1,] 800 29.25143 9.990206 0.5682446 [2,] 700 34.30301 10.167473 0.6663777 $objective [1] 10.0592 $i.med [1] 1279 1349 $medoids [,1] [,2] [1,] 50.1502433 10.60358224 [2,] -0.9691441 0.03342515 $clusinfo size max_diss av_diss isolation [1,] 800 30.54975 9.953191 0.5852356 [2,] 700 33.31923 10.180359 0.6382900 $objective [1] 10.06241 $i.med [1] 1293 21 $medoids [,1] [,2] [1,] 50.5809098 9.7418386 [2,] -0.9493373 0.3552542 $clusinfo size max_diss av_diss isolation [1,] 800 29.98892 9.949013 0.5725461 [2,] 700 33.12704 10.191999 0.6324587 $objective [1] 10.0592 $i.med [1] 337 675 $medoids [,1] [,2] [1,] -0.9691441 0.03342515 [2,] 50.1502433 10.60358224 $clusinfo size max_diss av_diss isolation [1,] 700 33.31923 10.180359 0.6382900 [2,] 800 30.54975 9.953191 0.5852356 $objective [1] 10.05697 $i.med [1] 22 574 $medoids [,1] [,2] [1,] 50.5809098 9.74183863 [2,] -0.9691441 0.03342515 $clusinfo size max_diss av_diss isolation [1,] 800 29.98892 9.949013 0.5716937 [2,] 700 33.31923 10.180359 0.6351809 $objective [1] 10.05096 $i.med [1] 739 808 $medoids [,1] [,2] [1,] 50.5809098 9.7418386 [2,] -0.2092816 -0.5916053 $clusinfo size max_diss av_diss isolation [1,] 800 29.98892 9.949013 0.5785936 [2,] 700 34.30301 10.167473 0.6618278 $objective [1] 10.06135 $i.med [1] 1431 485 $medoids [,1] [,2] [1,] 50.0627056 9.47822525 [2,] -0.9691441 0.03342515 $clusinfo size max_diss av_diss isolation [1,] 800 29.51131 9.957225 0.5686352 [2,] 700 33.31923 10.180359 0.6420076 $objective [1] 10.05324 $i.med [1] 10 1221 $medoids [,1] [,2] [1,] 50.58090982 9.741838628 [2,] -0.03900399 -0.009078886 $clusinfo size max_diss av_diss isolation [1,] 800 29.98892 9.949013 0.5817385 [2,] 700 34.06055 10.172348 0.6607218 $objective [1] 10.06101 $i.med [1] 1249 1411 $medoids [,1] [,2] [1,] -0.9691441 0.03342515 [2,] 50.0372683 9.50112485 $clusinfo size max_diss av_diss isolation [1,] 700 33.31923 10.180359 0.6422655 [2,] 800 29.51964 9.956571 0.5690241 $objective [1] 10.05296 $i.med [1] 610 21 $medoids [,1] [,2] [1,] -0.3292826 -0.2398794 [2,] 50.0372683 9.5011249 $clusinfo size max_diss av_diss isolation [1,] 700 33.98451 10.163128 0.6624677 [2,] 800 29.51964 9.956571 0.5754330 $objective [1] 10.06486 $i.med [1] 1101 397 $medoids [,1] [,2] [1,] -0.9691441 0.03342515 [2,] 50.1066826 9.35514422 $clusinfo size max_diss av_diss isolation [1,] 700 33.31923 10.180359 0.6417479 [2,] 800 29.42336 9.963794 0.5667111 $objective [1] 10.07521 $i.med [1] 838 356 $medoids [,1] [,2] [1,] 50.36212634 9.020718482 [2,] -0.03900399 -0.009078886 $clusinfo size max_diss av_diss isolation [1,] 800 29.25143 9.990206 0.5712766 [2,] 700 34.06055 10.172348 0.6651980 $objective [1] 10.05906 $i.med [1] 1270 1024 $medoids [,1] [,2] [1,] 50.5455985 9.3904972 [2,] -0.2092816 -0.5916053 $clusinfo size max_diss av_diss isolation [1,] 800 29.66384 9.964205 0.5734673 [2,] 700 34.30301 10.167473 0.6631526 > > clara(x, 2, samples = 101)[clInd] $objective [1] 10.05727 $i.med [1] 286 1115 $medoids [,1] [,2] [1,] -0.03900399 -0.009078886 [2,] 50.03726827 9.501124850 $clusinfo size max_diss av_diss isolation [1,] 700 34.06055 10.172348 0.6682295 [2,] 800 29.51964 9.956571 0.5791419 > clara(x, 2, samples = 149)[clInd] $objective [1] 10.05319 $i.med [1] 238 1272 $medoids [,1] [,2] [1,] -0.2092816 -0.5916053 [2,] 50.1502433 10.6035822 $clusinfo size max_diss av_diss isolation [1,] 700 34.30301 10.167473 0.6649301 [2,] 800 30.54975 9.953191 0.5921768 > clara(x, 2, samples = 200)[clInd] $objective [1] 10.05319 $i.med [1] 238 1272 $medoids [,1] [,2] [1,] -0.2092816 -0.5916053 [2,] 50.1502433 10.6035822 $clusinfo size max_diss av_diss isolation [1,] 700 34.30301 10.167473 0.6649301 [2,] 800 30.54975 9.953191 0.5921768 > ## Note that this last one is practically identical to the slower pam() one > > (ii <- sample(length(x), 20)) [1] 249 452 2663 2537 2235 2421 1004 1834 2602 397 717 2805 1575 1281 283 [16] 1657 1749 820 269 519 > ## This was bogous (and lead to seg.faults); now properly gives error. > ## but for these, now see ./clara-NAs.R > if(FALSE) { ## ~~~~~~~~~~~~~ + x[ii] <- NA + try( clara(x, 2, samples = 50) ) + } > > ###-- Larger example: 2000 objects, divided into 5 clusters. > x5 <- rbind(cbind(rnorm(400, 0,4), rnorm(400, 0,4)), + cbind(rnorm(400,10,8), rnorm(400,40,6)), + cbind(rnorm(400,30,4), rnorm(400, 0,4)), + cbind(rnorm(400,40,4), rnorm(400,20,2)), + cbind(rnorm(400,50,4), rnorm(400,50,4))) > ## plus 1 random dimension > x5 <- cbind(x5, rnorm(nrow(x5))) > > clara(x5, 5) Call: clara(x = x5, k = 5) Medoids: [,1] [,2] [,3] [1,] 0.5850466 -2.222194 -0.63631241 [2,] 8.0131143 42.708122 -0.31693240 [3,] 42.6657812 21.123133 -0.62411426 [4,] 50.6470292 48.480686 -0.09146223 [5,] 28.6470950 -2.544131 -0.22186047 Objective function: 6.100721 Clustering vector: int [1:2000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 400 396 408 401 395 Best sample: [1] 23 130 178 202 267 297 338 357 376 387 439 441 638 647 662 [16] 719 723 802 874 880 994 1038 1056 1097 1184 1215 1225 1268 1271 1282 [31] 1346 1442 1446 1474 1496 1515 1585 1590 1605 1641 1680 1687 1696 1728 1742 [46] 1761 1857 1909 1951 1956 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > summary(clara(x5, 5, samples = 50)) Object of class 'clara' from call: clara(x = x5, k = 5, samples = 50) Medoids: [,1] [,2] [,3] [1,] -0.8427864 0.1606105 -0.70362181 [2,] 12.0389703 39.0303445 0.19158023 [3,] 39.6341676 20.7182868 0.43978514 [4,] 50.6470292 48.4806864 -0.09146223 [5,] 30.6814242 -0.1072177 -0.25861548 Objective function: 5.743812 Numerical information per cluster: size max_diss av_diss isolation [1,] 400 15.20728 5.207177 0.4823345 [2,] 397 24.25898 8.677062 0.7324727 [3,] 406 18.39064 4.369617 0.8109074 [4,] 401 18.28050 5.260543 0.6119680 [5,] 396 12.69653 5.243478 0.5598344 Average silhouette width per cluster: [1] 0.7433532 0.6956424 0.7315944 0.7336104 0.7079360 Average silhouette width of best sample: 0.7188531 Best sample: [1] 106 130 145 213 275 316 434 444 486 501 630 693 713 739 773 [16] 804 808 821 823 899 914 948 961 972 980 987 1076 1114 1126 1127 [31] 1169 1175 1203 1225 1228 1242 1269 1397 1405 1421 1595 1606 1658 1703 1777 [46] 1834 1857 1881 1937 1999 Clustering vector: [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [186] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [223] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [260] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [297] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [334] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [371] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 [408] 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [445] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [482] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [519] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [556] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [593] 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 4 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [630] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [667] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [704] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [741] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [778] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [815] 5 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [852] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [889] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [926] 5 5 5 5 5 5 5 5 5 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [963] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1000] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1037] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1074] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1111] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1148] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 [1185] 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1222] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1259] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1296] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1333] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1370] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1407] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1444] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1481] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1518] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1555] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 [1592] 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1629] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1666] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1703] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1740] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1777] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1814] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1851] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1888] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1925] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1962] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [1999] 4 4 Silhouette plot information for best sample: cluster neighbor sil_width 130 1 5 0.8123353 275 1 5 0.7945197 316 1 5 0.7561799 213 1 5 0.7459412 106 1 5 0.6869957 145 1 5 0.6641473 630 2 3 0.7819320 739 2 3 0.7774128 486 2 3 0.7559683 713 2 3 0.7316982 444 2 3 0.7204625 501 2 3 0.7091146 773 2 1 0.6886472 693 2 3 0.5855803 434 2 3 0.5099654 1225 3 5 0.8105776 1203 3 5 0.7965773 1595 3 5 0.7842711 1269 3 5 0.7799931 1242 3 5 0.7625442 1397 3 5 0.7315512 1228 3 5 0.7262025 1421 3 5 0.6011616 1405 3 5 0.5914707 1999 4 3 0.8050046 1857 4 3 0.8030709 1658 4 3 0.7941141 1777 4 3 0.7865209 1937 4 3 0.7831996 1881 4 3 0.7504779 1834 4 3 0.6614223 1606 4 3 0.6373808 1703 4 3 0.5813025 804 5 3 0.8021043 987 5 3 0.7999064 1076 5 3 0.7907769 948 5 3 0.7905304 961 5 3 0.7716289 823 5 3 0.7657693 808 5 3 0.7510670 914 5 3 0.7358231 1175 5 3 0.7337485 1169 5 3 0.7254812 972 5 3 0.7118795 821 5 3 0.7101558 899 5 1 0.6580927 1114 5 3 0.6552887 1127 5 3 0.6292428 1126 5 3 0.5362475 980 5 1 0.4671695 1225 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 0.69682 19.31600 34.09200 33.07000 46.25400 92.25300 Metric : euclidean Number of objects : 50 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > ## 3 "half" samples: > clara(x5, 5, samples = 999) Call: clara(x = x5, k = 5, samples = 999) Medoids: [,1] [,2] [,3] [1,] 0.2143499 0.3891695 0.45577894 [2,] 10.9779485 39.6788652 -0.23487762 [3,] 40.2944064 20.2221253 0.21417849 [4,] 50.7170411 49.7645642 -0.43318939 [5,] 29.7257398 -0.5981739 -0.05616701 Objective function: 5.659041 Clustering vector: int [1:2000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 400 397 407 401 395 Best sample: [1] 1 2 103 147 155 176 179 247 262 288 365 369 372 470 486 [16] 573 759 779 785 791 797 822 875 883 913 954 1107 1114 1154 1156 [31] 1171 1175 1206 1213 1218 1233 1243 1394 1439 1444 1512 1741 1777 1798 1800 [46] 1818 1845 1946 1948 1973 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > clara(x5, 5, samples = 1000) Call: clara(x = x5, k = 5, samples = 1000) Medoids: [,1] [,2] [,3] [1,] 0.2143499 0.3891695 0.45577894 [2,] 10.9779485 39.6788652 -0.23487762 [3,] 40.2944064 20.2221253 0.21417849 [4,] 50.7170411 49.7645642 -0.43318939 [5,] 29.7257398 -0.5981739 -0.05616701 Objective function: 5.659041 Clustering vector: int [1:2000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 400 397 407 401 395 Best sample: [1] 1 2 103 147 155 176 179 247 262 288 365 369 372 470 486 [16] 573 759 779 785 791 797 822 875 883 913 954 1107 1114 1154 1156 [31] 1171 1175 1206 1213 1218 1233 1243 1394 1439 1444 1512 1741 1777 1798 1800 [46] 1818 1845 1946 1948 1973 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > clara(x5, 5, samples = 1001) Call: clara(x = x5, k = 5, samples = 1001) Medoids: [,1] [,2] [,3] [1,] 0.2143499 0.3891695 0.45577894 [2,] 10.9779485 39.6788652 -0.23487762 [3,] 40.2944064 20.2221253 0.21417849 [4,] 50.7170411 49.7645642 -0.43318939 [5,] 29.7257398 -0.5981739 -0.05616701 Objective function: 5.659041 Clustering vector: int [1:2000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 400 397 407 401 395 Best sample: [1] 1 2 103 147 155 176 179 247 262 288 365 369 372 470 486 [16] 573 759 779 785 791 797 822 875 883 913 954 1107 1114 1154 1156 [31] 1171 1175 1206 1213 1218 1233 1243 1394 1439 1444 1512 1741 1777 1798 1800 [46] 1818 1845 1946 1948 1973 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > > clara(x5, 5, samples = 2000)#full sample Call: clara(x = x5, k = 5, samples = 2000) Medoids: [,1] [,2] [,3] [1,] 0.2143499 0.3891695 0.45577894 [2,] 10.5993345 39.8970536 -0.39199265 [3,] 40.3370139 20.3148331 -0.06033818 [4,] 50.7170411 49.7645642 -0.43318939 [5,] 29.7257398 -0.5981739 -0.05616701 Objective function: 5.65785 Clustering vector: int [1:2000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 400 397 407 401 395 Best sample: [1] 84 106 164 226 284 288 329 423 430 450 469 593 603 654 742 [16] 887 929 970 974 1035 1043 1096 1171 1187 1192 1302 1307 1327 1371 1431 [31] 1433 1439 1440 1452 1513 1522 1525 1548 1565 1593 1620 1639 1654 1688 1740 [46] 1761 1832 1845 1895 1899 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > > ###--- Start a version of example(clara) ------- > > ## xclara : artificial data with 3 clusters of 1000 bivariate objects each. > data(xclara) > (clx3 <- clara(xclara, 3)) Call: clara(x = xclara, k = 3) Medoids: V1 V2 [1,] 5.553391 13.306260 [2,] 43.198760 60.360720 [3,] 74.591890 -6.969018 Objective function: 13.225 Clustering vector: int [1:3000] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... Cluster sizes: 900 1148 952 Best sample: [1] 20 30 46 91 92 169 179 187 209 223 382 450 555 971 1004 [16] 1025 1058 1277 1281 1302 1319 1361 1362 1513 1591 1623 1628 1729 1752 1791 [31] 1907 1917 1946 2064 2089 2498 2527 2537 2545 2591 2672 2722 2729 2790 2797 [46] 2852 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > ## Plot similar to Figure 5 in Struyf et al (1996) > plot(clx3) > > ## The rngR = TRUE case is currently in the non-strict tests > ## ./clara-ex.R > ## ~~~~~~~~~~~~ > > ###--- End version of example(clara) ------- > > ## small example(s): > data(ruspini) > > clara(ruspini,4) Call: clara(x = ruspini, k = 4) Medoids: x y 10 19 65 32 44 149 52 99 119 67 66 18 Objective function: 11.51066 Clustering vector: Named int [1:75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ... - attr(*, "names")= chr [1:75] "1" "2" "3" "4" "5" "6" "7" ... Cluster sizes: 20 23 17 15 Best sample: [1] 2 3 4 5 6 7 8 9 10 16 18 19 20 21 22 23 25 29 30 32 34 35 36 37 41 [26] 42 43 44 46 47 49 50 52 53 54 58 59 60 61 63 65 66 67 69 71 72 73 75 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > > rus <- data.matrix(ruspini); storage.mode(rus) <- "double" > ru2 <- rus[c(1:7,21:28, 45:51, 61:69),] > ru3 <- rus[c(1:4,21:25, 45:48, 61:63),] > ru4 <- rus[c(1:2,21:22, 45:47),] > ru5 <- rus[c(1:2,21, 45),] > daisy(ru5, "manhattan") Dissimilarities : 1 2 21 2 11 21 118 107 45 143 132 89 Metric : manhattan Number of objects : 4 > ## Dissimilarities : 11 118 143 107 132 89 > > ## no problem anymore, since 2002-12-28: > ## sampsize >= k+1 is now enforced: > ## clara(ru5, k=3, met="manhattan", sampsize=3,trace=2)[clInS] > clara(ru5, k=3, met="manhattan", sampsize=4,trace=1)[clInS] C clara(): (nsam,nran,n) = (4,5,4); 'full_sample', C clara(): sample 1 C clara(): best sample _found_ --> dysta2(nbest), resul(), end $objective [1] 2.75 $i.med [1] 2 3 4 $medoids x y 2 5 63 21 28 147 45 85 115 $clusinfo size max_diss av_diss isolation [1,] 2 11 5.5 0.1028037 [2,] 1 0 0.0 0.0000000 [3,] 1 0 0.0 0.0000000 $sample [1] "1" "2" "21" "45" > > daisy(ru4, "manhattan") Dissimilarities : 1 2 21 22 45 46 2 11 21 118 107 22 124 113 6 45 143 132 89 87 46 124 113 108 106 19 47 115 104 103 101 28 9 Metric : manhattan Number of objects : 7 > ## this one (k=3) gave problems, from ss = 6 on ___ still after 2002-12-28 ___ : > for(ss in 4:nrow(ru4)){ + cat("---\n\nsample size = ",ss,"\n") + print(clara(ru4,k=3,met="manhattan",sampsize=ss)[clInS]) + } --- sample size = 4 $objective [1] 7.714286 $i.med [1] 1 4 7 $medoids x y 1 4 53 22 32 149 47 78 94 $clusinfo size max_diss av_diss isolation [1,] 2 11 5.50000 0.09565217 [2,] 2 6 3.00000 0.05940594 [3,] 3 28 12.33333 0.27722772 $sample [1] "1" "22" "45" "47" --- sample size = 5 $objective [1] 7.714286 $i.med [1] 2 3 7 $medoids x y 2 5 63 21 28 147 47 78 94 $clusinfo size max_diss av_diss isolation [1,] 2 11 5.50000 0.10576923 [2,] 2 6 3.00000 0.05825243 [3,] 3 28 12.33333 0.27184466 $sample [1] "2" "21" "22" "45" "47" --- sample size = 6 $objective [1] 6.428571 $i.med [1] 2 4 6 $medoids x y 2 5 63 22 32 149 46 85 96 $clusinfo size max_diss av_diss isolation [1,] 2 11 5.500000 0.09734513 [2,] 2 6 3.000000 0.05660377 [3,] 3 19 9.333333 0.17924528 $sample [1] "2" "21" "22" "45" "46" "47" --- sample size = 7 $objective [1] 6.428571 $i.med [1] 2 4 6 $medoids x y 2 5 63 22 32 149 46 85 96 $clusinfo size max_diss av_diss isolation [1,] 2 11 5.500000 0.09734513 [2,] 2 6 3.000000 0.05660377 [3,] 3 19 9.333333 0.17924528 $sample [1] "1" "2" "21" "22" "45" "46" "47" > for(ss in 5:nrow(ru3)){ + cat("---\n\nsample size = ",ss,"\n") + print(clara(ru3,k=4,met="manhattan",sampsize=ss)[clInS]) + } --- sample size = 5 $objective [1] 13.625 $i.med [1] 4 5 10 15 $medoids x y 4 9 77 21 28 147 45 85 115 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 29 16.50 0.3258427 [2,] 5 14 9.00 0.1573034 [3,] 4 30 19.25 0.3370787 [4,] 3 15 10.00 0.1351351 $sample [1] "3" "4" "21" "45" "62" --- sample size = 6 $objective [1] 9.0625 $i.med [1] 3 7 13 15 $medoids x y 3 10 59 23 35 153 48 74 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 19 10.00 0.1881188 [2,] 5 13 5.60 0.1354167 [3,] 4 30 11.75 0.3448276 [4,] 3 15 10.00 0.1724138 $sample [1] "3" "21" "23" "45" "48" "62" --- sample size = 7 $objective [1] 9.0625 $i.med [1] 3 7 13 15 $medoids x y 3 10 59 23 35 153 48 74 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 19 10.00 0.1881188 [2,] 5 13 5.60 0.1354167 [3,] 4 30 11.75 0.3448276 [4,] 3 15 10.00 0.1724138 $sample [1] "2" "3" "21" "23" "45" "48" "62" --- sample size = 8 $objective [1] 8.8125 $i.med [1] 3 7 12 15 $medoids x y 3 10 59 23 35 153 47 78 94 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 19 10.00 0.1844660 [2,] 5 13 5.60 0.1274510 [3,] 4 28 10.75 0.3373494 [4,] 3 15 10.00 0.1807229 $sample [1] "3" "21" "23" "46" "47" "48" "61" "62" --- sample size = 9 $objective [1] 9.3125 $i.med [1] 2 6 11 16 $medoids x y 2 5 63 22 32 149 46 85 96 63 83 21 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1592920 [2,] 5 8 5.40 0.0754717 [3,] 4 19 9.75 0.2467532 [4,] 3 30 15.00 0.3896104 $sample [1] "2" "21" "22" "23" "45" "46" "47" "61" "63" --- sample size = 10 $objective [1] 8.5625 $i.med [1] 3 7 11 15 $medoids x y 3 10 59 23 35 153 46 85 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 19 10.00 0.1696429 [2,] 5 13 5.60 0.1214953 [3,] 4 19 9.75 0.2065217 [4,] 3 15 10.00 0.1630435 $sample [1] "2" "3" "22" "23" "45" "46" "47" "61" "62" "63" --- sample size = 11 $objective [1] 8.6875 $i.med [1] 2 7 12 15 $medoids x y 2 5 63 23 35 153 47 78 94 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1730769 [2,] 5 13 5.60 0.1274510 [3,] 4 28 10.75 0.3373494 [4,] 3 15 10.00 0.1807229 $sample [1] "1" "2" "3" "4" "23" "24" "25" "45" "47" "48" "62" --- sample size = 12 $objective [1] 8.8125 $i.med [1] 3 7 12 15 $medoids x y 3 10 59 23 35 153 47 78 94 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 19 10.00 0.1844660 [2,] 5 13 5.60 0.1274510 [3,] 4 28 10.75 0.3373494 [4,] 3 15 10.00 0.1807229 $sample [1] "2" "3" "22" "23" "24" "25" "46" "47" "48" "61" "62" "63" --- sample size = 13 $objective [1] 8.4375 $i.med [1] 2 7 11 15 $medoids x y 2 5 63 23 35 153 46 85 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1592920 [2,] 5 13 5.60 0.1214953 [3,] 4 19 9.75 0.2065217 [4,] 3 15 10.00 0.1630435 $sample [1] "1" "2" "4" "22" "23" "24" "25" "45" "46" "47" "61" "62" "63" --- sample size = 14 $objective [1] 8.4375 $i.med [1] 2 7 11 15 $medoids x y 2 5 63 23 35 153 46 85 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1592920 [2,] 5 13 5.60 0.1214953 [3,] 4 19 9.75 0.2065217 [4,] 3 15 10.00 0.1630435 $sample [1] "2" "3" "4" "22" "23" "24" "25" "45" "46" "47" "48" "61" "62" "63" --- sample size = 15 $objective [1] 8.375 $i.med [1] 2 6 11 15 $medoids x y 2 5 63 22 32 149 46 85 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1592920 [2,] 5 8 5.40 0.0754717 [3,] 4 19 9.75 0.2065217 [4,] 3 15 10.00 0.1630435 $sample [1] "2" "3" "4" "21" "22" "23" "24" "25" "45" "46" "47" "48" "61" "62" "63" --- sample size = 16 $objective [1] 8.375 $i.med [1] 2 6 11 15 $medoids x y 2 5 63 22 32 149 46 85 96 62 77 12 $clusinfo size max_diss av_diss isolation [1,] 4 18 9.50 0.1592920 [2,] 5 8 5.40 0.0754717 [3,] 4 19 9.75 0.2065217 [4,] 3 15 10.00 0.1630435 $sample [1] "1" "2" "3" "4" "21" "22" "23" "24" "25" "45" "46" "47" "48" "61" "62" [16] "63" > > ## Last Line: > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 2.121 0.013 2.146 0 0 > ## Lynne (P IV, 1.6 GHz): 18.81; then (no NA; R 1.9.0-alpha): 15.07 > ## nb-mm (P III,700 MHz): 27.97 > > proc.time() user system elapsed 2.248 0.037 2.343 cluster/tests/diana-boots.R0000644000175100001440000000161011674325037015444 0ustar hornikuserslibrary(cluster) ## Kind of a bootstrap -- calling many diana()s dianaBoot <- function(data, B = 500, frac.sub = c(0.7, min = 0.2), digits = 4) { stopifnot((n <- nrow(data)) >= 10, B >= 10, frac.sub > 0, (m <- round(frac.sub[["min"]]*n)) >= 2, (mm <- round(frac.sub[1]*n)) > m) for(b in 1:B) { d.r <- data[sample(n, max(m, min(n, rpois(1, lambda = mm)))) ,] dia. <- diana(d.r, keep.diss=FALSE, keep.data=FALSE) print(dia.[1:3], digits = digits) } } .p0 <- proc.time() data(ruspini) set.seed(134) dianaBoot(ruspini) cat('Time elapsed: ', (.p1 <- proc.time()) - .p0,'\n') data(agriculture) set.seed(707) dianaBoot(agriculture) cat('Time elapsed: ', (.p2 <- proc.time()) - .p1,'\n') data(swiss); swiss.x <- as.matrix(swiss[,-1]) set.seed(312) dianaBoot(swiss.x) cat('Time elapsed: ', (.p3 <- proc.time()) - .p2,'\n') cluster/tests/silhouette-default.R0000644000175100001440000000510011626747420017052 0ustar hornikusers## This came from a bug report on R-help by ge yreyt ## Date: Mon, 9 Jun 2003 16:06:53 -0400 (EDT) library(cluster) if(FALSE) # manual testing library(cluster, lib="~/R/Pkgs/cluster.Rcheck") data(iris) .proctime00 <- proc.time() mdist <- as.dist(1 - cor(t(iris[,1:4])))#dissimlarity ## this is always the same: hc <- diana(mdist, diss = TRUE, stand = FALSE) maxk <- 15 # at most 15 clusters silh.wid <- numeric(maxk) # myind[k] := the silh.value for k clusters silh.wid[1] <- NA # 1-cluster: silhouette not defined op <- par(mfrow = c(4,4), mar = .1+ c(2,1,2,1), mgp=c(1.5, .6,0)) for(k in 2:maxk) { cat("\n", k,":\n==\n") k.gr <- cutree(as.hclust(hc), k = k) cat("grouping table: "); print(table(k.gr)) si <- silhouette(k.gr, mdist) cat("silhouette:\n"); print(summary(si)) plot(si, main = paste("k =",k), col = 2:(k+1), do.n.k=FALSE, do.clus.stat=FALSE) silh.wid[k] <- summary(si)$avg.width ## === } par(op) summary(si.p <- silhouette(50 - k.gr, mdist)) stopifnot(identical(si.p[,3], si[,3]), identical(si.p[, 1:2], 50 - si[, 1:2])) # the widths: silh.wid #select the number of k clusters with the largest si value : (myk <- which.min(silh.wid)) # -> 8 (here) postscript(file="silhouette-ex.ps") ## MM: plot to see how the decision is made plot(silh.wid, type = 'b', col= "blue", xlab = "k") axis(1, at=myk, col.axis= "red", font.axis= 2) ##--- PAM()'s silhouette should give same as silh*.default()! Eq <- function(x,y, tol = 1e-12) x == y | abs(x - y) < tol * abs((x+y)/2) for(k in 2:40) { cat("\n", k,":\n==\n") p.k <- pam(mdist, k = k) k.gr <- p.k$clustering si.p <- silhouette(p.k) si.g <- silhouette(k.gr, mdist) ## since the obs.order may differ (within cluster): si.g <- si.g[ as.integer(rownames(si.p)), ] cat("grouping table: "); print(table(k.gr)) if(!isTRUE(all.equal(c(si.g), c(si.p)))) { cat("silhouettes differ:") if(any(neq <- !Eq(si.g[,3], si.p[,3]))) { cat("\n") print( cbind(si.p[], si.g[,2:3])[ neq, ] ) } else cat(" -- but not in col.3 !\n") } } ## "pathological" case where a_i == b_i == 0 : D6 <- structure(c(0, 0, 0, 0.4, 1, 0.05, 1, 1, 0, 1, 1, 0, 0.25, 1, 1), Labels = LETTERS[1:6], Size = 6, call = as.name("manually"), class = "dist", Diag = FALSE, Upper = FALSE) D6 kl6 <- c(1,1, 2,2, 3,3) silhouette(kl6, D6)# had one NaN summary(silhouette(kl6, D6)) plot(silhouette(kl6, D6))# gives error in earlier cluster versions dev.off() ## Last Line: cat('Time elapsed: ', proc.time() - .proctime00,'\n') cluster/tests/agnes-ex.R0000644000175100001440000000635012422156313014750 0ustar hornikuserslibrary(cluster) options(digits = 6) data(votes.repub) ## From Matrix' test-tools-1.R : showProc.time <- local({ ## function + 'pct' variable pct <- proc.time() function(final="\n") { ## CPU elapsed __since last called__ ot <- pct ; pct <<- proc.time() ## 'Time ..' *not* to be translated: tools::Rdiff() skips its lines! cat('Time elapsed: ', (pct - ot)[1:3], final) } }) agn1 <- agnes(votes.repub, metric = "manhattan", stand = TRUE) summary(agn1) Dvr <- daisy(votes.repub) agn2 <- agnes(Dvr, method = "complete") summary(agn2) ## almost same: (ag2. <- agnes(Dvr, method= "complete", keep.diss=FALSE)) ag22 <- agnes(votes.repub, method= "complete", keep.diss=FALSE,keep.data=FALSE) stopifnot(identical(agn2[-5:-6], ag2.[-5:-6]), identical(Dvr, daisy(votes.repub)), # DUP=FALSE (!) identical(ag2.[-6], ag22[-6]) ) data(agriculture) summary(agnes(agriculture)) data(ruspini) summary(ar0 <- agnes(ruspini, keep.diss=FALSE, keep.data=FALSE)) summary(ar1 <- agnes(ruspini, metric = "manhattan")) str(ar1) showProc.time() summary(ar2 <- agnes(ruspini, metric="manhattan", method = "weighted")) print (ar3 <- agnes(ruspini, metric="manhattan", method = "flexible", par.meth = 0.5)) stopifnot(all.equal(ar2[1:4], ar3[1:4], tol=1e-12)) showProc.time() ## Small example, testing "flexible" vs "single" i8 <- -c(1:2, 9:10) dim(agr8 <- agriculture[i8, ]) i5 <- -c(1:2, 8:12) dim(agr5 <- agriculture[i5, ]) chk <- function(d, method=c("single", "complete", "weighted"), trace.lev = 1, iC = -(6:7), # <- not using 'call' and 'method' for comparisons doplot = FALSE, tol = 1e-12) { if(!inherits(d, "dist")) d <- daisy(d, "manhattan") method <- match.arg(method) par.meth <- list("single" = c(.5, .5, 0, -.5), "complete"= c(.5, .5, 0, +.5), "weighted"= c(0.5)) a.s <- agnes(d, method=method, trace.lev=trace.lev) ## From theory, this should give the same, but it does not --- why ??? a.f <- agnes(d, method="flex", par.method = par.meth[[method]], trace.lev=trace.lev) if(doplot) { op <- par(mfrow = c(2,2), mgp = c(1.6, 0.6, 0), mar = .1 + c(4,4,2,1)) on.exit(par(op)) plot(a.s) plot(a.f) } structure(all.equal(a.s[iC], a.f[iC], tolerance = tol), fits = list(s = a.s, f = a.f)) } chk(agr5, trace = 3) stopifnot(chk(agr5), chk(agr5, "complete", trace = 2), chk(agr5, "weighted"), chk(agr8), chk(agr8, "complete"), chk(agr8, "weighted", trace.lev=2), chk(agriculture), chk(agriculture, "complete"), chk(ruspini), chk(ruspini, "complete"), chk(ruspini, "weighted")) showProc.time() ## an invalid "flexible" case - now must give error early: x <- rbind(c( -6, -9), c( 0, 13), c(-15, 6), c(-14, 0), c(12,-10)) (dx <- daisy(x, "manhattan")) a.x <- tryCatch(agnes(dx, method="flexible", par = -.2), error = function(e)e) ## agnes(method=6, par.method=*) lead to invalid merge; step 4, D(.,.)=-26.1216 if(!inherits(a.x, "error")) stop("invalid 'par' in \"flexible\" did not give error") if(!all(vapply(c("par[.]method", "merge"), grepl, NA, x=a.x$message))) stop("error message did not contain expected words") cluster/tests/ellipsoid-ex.R0000644000175100001440000000153412540507660015644 0ustar hornikuserslibrary(cluster) eh <- ellipsoidhull(cbind(x=1:4, y = 1:4)) #singular eh ## center ok, shape "0 volume" --> Warning set.seed(157) for(n in 4:10) { ## n=2 and 3 still differ -- platform dependently! cat("n = ",n,"\n") x2 <- rnorm(n) print(ellipsoidhull(cbind(1:n, x2))) print(ellipsoidhull(cbind(1:n, x2, 4*x2 + rnorm(n)))) } set.seed(1) x <- rt(100, df = 4) y <- 100 + 5 * x + rnorm(100) ellipsoidhull(cbind(x,y)) z <- 10 - 8 * x + y + rnorm(100) (e3 <- ellipsoidhull(cbind(x,y,z))) d3o <- cbind(x,y + rt(100,3), 2 * x^2 + rt(100, 2)) (e. <- ellipsoidhull(d3o, ret.sq = TRUE)) stopifnot(all.equal(e.$sqdist, with(e., mahalanobis(d3o, center=loc, cov=cov)), tol = 1e-13)) d5 <- cbind(d3o, 2*abs(y)^1.5 + rt(100,3), 3*x - sqrt(abs(y))) (e5 <- ellipsoidhull(d5, ret.sq = TRUE)) tail(sort(e5$sqdist)) ## 4 values 5.00039 ... 5.0099 cluster/tests/diana-ex.Rout.save0000644000175100001440000004203611674325037016426 0ustar hornikusers R version 2.14.1 RC (2011-12-20 r57943) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > options(digits = 6) > data(votes.repub) > di.votes <- daisy(votes.repub) > > .p00 <- proc.time() > summary(diana(votes.repub, metric = "manhattan", stand = TRUE)) Merge: [,1] [,2] [1,] -7 -32 [2,] -13 -35 [3,] -12 -50 [4,] 1 -30 [5,] -26 -28 [6,] -5 -37 [7,] -22 -38 [8,] -21 -39 [9,] -16 -27 [10,] 4 2 [11,] -25 -48 [12,] -42 -46 [13,] -6 -14 [14,] -34 -41 [15,] -8 -20 [16,] 5 -31 [17,] 10 7 [18,] -17 -47 [19,] -3 -44 [20,] -33 12 [21,] 15 18 [22,] 17 -29 [23,] 22 -49 [24,] 21 11 [25,] 23 -15 [26,] -11 -19 [27,] 3 9 [28,] 8 -23 [29,] 19 16 [30,] 27 14 [31,] 6 25 [32,] -1 -10 [33,] 31 13 [34,] 29 -36 [35,] -2 -45 [36,] -9 -43 [37,] 24 20 [38,] 32 -4 [39,] -24 -40 [40,] 38 -18 [41,] 33 30 [42,] 34 37 [43,] 35 26 [44,] 41 28 [45,] 40 36 [46,] 42 44 [47,] 45 39 [48,] 43 46 [49,] 47 48 Order of objects: [1] Alabama Georgia Arkansas Louisiana Florida [6] Texas Mississippi South Carolina Alaska Vermont [11] Hawaii Maine Arizona Utah Montana [16] Nevada New Mexico Oklahoma Delaware Maryland [21] Kentucky Washington Missouri West Virginia North Carolina [26] Tennessee Virginia California Oregon Connecticut [31] New York New Jersey Illinois Ohio Michigan [36] Pennsylvania New Hampshire Wisconsin Iowa Colorado [41] Indiana Idaho Wyoming Kansas Nebraska [46] North Dakota South Dakota Massachusetts Rhode Island Minnesota Height: [1] 27.36345 33.96925 39.65826 48.53428 31.89965 72.59850 35.69152 [8] 167.58020 31.58222 43.84601 24.48796 85.55248 18.39339 25.67631 [15] 11.49397 17.45552 28.62550 42.54480 16.48510 20.04450 17.87516 [22] 21.98373 14.21808 33.61071 18.39733 14.75762 56.55675 11.70132 [29] 27.05887 8.38200 11.36820 13.25237 9.23004 17.83484 12.70819 [36] 20.66714 21.03997 23.66586 28.60541 15.31703 40.33905 10.46294 [43] 24.83525 12.80419 26.36292 16.25192 47.25773 12.79160 24.87206 Divisive coefficient: [1] 0.886918 1225 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 8.382 25.540 34.510 45.060 56.020 167.600 Metric : manhattan Number of objects : 50 Available components: [1] "order" "height" "dc" "merge" "diss" "call" [7] "order.lab" "data" > summary(diana(di.votes, keep.diss = FALSE)) Merge: [,1] [,2] [1,] -12 -50 [2,] -13 -32 [3,] -14 -35 [4,] -7 -29 [5,] -21 -39 [6,] -3 -28 [7,] -25 -48 [8,] -16 -27 [9,] -15 -41 [10,] 2 -30 [11,] 6 -26 [12,] -33 -42 [13,] 12 -46 [14,] 1 -44 [15,] 10 3 [16,] -22 -38 [17,] -11 -19 [18,] -5 -47 [19,] -17 -20 [20,] -2 -45 [21,] 14 -31 [22,] -37 -49 [23,] 9 8 [24,] 4 15 [25,] -8 19 [26,] -6 21 [27,] 5 -23 [28,] 24 16 [29,] 26 -36 [30,] 11 29 [31,] -1 -10 [32,] 28 22 [33,] 23 -34 [34,] 7 13 [35,] -4 -9 [36,] 20 17 [37,] -24 -40 [38,] 31 -43 [39,] 32 33 [40,] 25 34 [41,] 30 18 [42,] 35 -18 [43,] 38 42 [44,] 39 27 [45,] 41 40 [46,] 36 44 [47,] 43 37 [48,] 46 45 [49,] 47 48 Order of objects: [1] Alabama Georgia Texas Arkansas Florida [6] Louisiana Mississippi South Carolina Alaska Vermont [11] Hawaii Maine Connecticut New Hampshire Illinois [16] New York New Jersey Indiana Ohio Michigan [21] Pennsylvania Oregon Wisconsin Iowa South Dakota [26] Kansas Nebraska North Dakota Massachusetts Rhode Island [31] Minnesota Arizona Nevada Montana Colorado [36] Idaho Wyoming Utah New Mexico Oklahoma [41] California Washington Delaware Kentucky Maryland [46] Missouri West Virginia North Carolina Tennessee Virginia Height: [1] 48.2397 63.1862 72.9221 56.1363 72.9221 116.7048 63.0951 281.9508 [9] 33.8330 58.0384 32.7611 106.7448 20.5216 39.1728 19.8436 27.0243 [17] 31.4966 20.2258 47.1690 31.6595 49.2428 36.7667 64.4821 26.1547 [25] 37.4564 25.9221 50.7201 77.1184 22.6334 44.4594 178.4119 23.4206 [33] 27.8273 48.0483 43.7055 17.1992 31.1988 34.0510 48.0483 70.4868 [41] 33.2328 81.0764 43.3829 33.4744 66.7591 25.3953 54.7306 29.5099 [49] 30.1541 Divisive coefficient: [1] 0.878225 Available components: [1] "order" "height" "dc" "merge" "diss" "call" [7] "order.lab" > cat('Time elapsed: ', proc.time() - .p00,'\n') Time elapsed: 0.014 0.001 0.015 0 0 > > data(agriculture) > data(ruspini) > > .p0 <- proc.time() > dia.agr <- diana(agriculture) > drusp0 <- diana(ruspini, keep.diss=FALSE, keep.data=FALSE) > drusp1 <- diana(ruspini, metric = "manhattan") > cat('Time elapsed: ', proc.time() - .p0,'\n') Time elapsed: 0.006 0.001 0.006 0 0 > > summary(dia.agr) Merge: [,1] [,2] [1,] -1 -10 [2,] -2 -9 [3,] 1 -3 [4,] -6 -8 [5,] -5 -7 [6,] 3 -12 [7,] -4 -11 [8,] 6 4 [9,] 8 2 [10,] 7 5 [11,] 9 10 Order of objects: [1] B NL D UK F I DK L GR P E IRL Height: [1] 1.64924 2.43516 4.85077 6.72309 2.77308 8.05295 2.22036 24.03539 [9] 5.16236 12.56742 3.14006 Divisive coefficient: [1] 0.871106 66 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 1.649 4.357 7.987 9.594 13.250 24.040 Metric : euclidean Number of objects : 12 Available components: [1] "order" "height" "dc" "merge" "diss" "call" [7] "order.lab" "data" > summary(drusp0) Merge: [,1] [,2] [1,] -18 -19 [2,] -55 -56 [3,] -27 -28 [4,] -49 -51 [5,] -33 -34 [6,] -23 -24 [7,] -59 -60 [8,] -29 -30 [9,] -67 -69 [10,] -36 -39 [11,] -32 -35 [12,] -50 -54 [13,] -37 -38 [14,] -70 -71 [15,] -64 -68 [16,] -62 -66 [17,] -12 -13 [18,] -16 1 [19,] -9 -10 [20,] -42 -43 [21,] -15 -17 [22,] -47 -48 [23,] 12 -52 [24,] -21 -22 [25,] 9 14 [26,] 2 -57 [27,] -73 -74 [28,] 6 -25 [29,] -26 11 [30,] 4 -53 [31,] 3 8 [32,] -11 17 [33,] -6 -8 [34,] -2 -3 [35,] 10 -40 [36,] -14 21 [37,] -65 25 [38,] -4 33 [39,] 36 18 [40,] 29 5 [41,] 26 7 [42,] -1 34 [43,] 24 28 [44,] 16 -63 [45,] -46 22 [46,] -31 35 [47,] 27 -75 [48,] 37 -72 [49,] 43 31 [50,] 30 23 [51,] 13 20 [52,] 15 48 [53,] 32 -20 [54,] 41 -58 [55,] 42 -5 [56,] 40 46 [57,] -44 -45 [58,] 19 39 [59,] 38 -7 [60,] 51 -41 [61,] -61 44 [62,] 50 54 [63,] 52 47 [64,] 61 63 [65,] 49 56 [66,] 59 53 [67,] 55 58 [68,] 57 62 [69,] 65 60 [70,] 67 66 [71,] 68 45 [72,] 69 71 [73,] 70 64 [74,] 73 72 Order of objects: [1] 1 2 3 5 9 10 14 15 17 16 18 19 4 6 8 7 11 12 13 20 61 62 66 63 64 [26] 68 65 67 69 70 71 72 73 74 75 21 22 23 24 25 27 28 29 30 26 32 35 33 34 31 [51] 36 39 40 37 38 42 43 41 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Height: [1] 10.04988 6.40312 16.12452 29.12044 4.12311 17.02939 8.48528 [8] 4.24264 9.21954 4.12311 1.41421 40.24922 8.94427 6.32456 [15] 19.02630 28.84441 6.32456 4.12311 14.14214 102.07840 21.40093 [22] 4.12311 10.81665 27.07397 3.60555 13.60147 8.54400 2.82843 [29] 5.09902 3.60555 12.80625 22.80351 5.65685 12.36932 154.49595 [36] 4.47214 10.77033 2.23607 5.83095 13.03840 2.00000 6.32456 [43] 2.82843 28.16026 6.08276 3.00000 9.43398 2.23607 16.27882 [50] 11.04536 3.00000 6.70820 36.61967 3.60555 13.34166 4.24264 [57] 20.02498 94.57801 17.02939 35.35534 2.23607 6.32456 13.15295 [64] 3.16228 4.47214 22.20360 2.00000 5.38516 9.84886 2.82843 [71] 15.29706 47.63402 11.00000 4.47214 Divisive coefficient: [1] 0.960566 Available components: [1] "order" "height" "dc" "merge" "diss" "call" [7] "order.lab" > summary(drusp1) Merge: [,1] [,2] [1,] -55 -56 [2,] -27 -28 [3,] -18 -19 [4,] -49 -51 [5,] -33 -34 [6,] -32 -35 [7,] -23 -24 [8,] -59 -60 [9,] -50 -54 [10,] -29 -30 [11,] -67 -69 [12,] -37 -38 [13,] 11 -71 [14,] -64 -68 [15,] -62 -66 [16,] -12 -13 [17,] -16 3 [18,] -9 -10 [19,] -47 -48 [20,] 9 -52 [21,] -42 -43 [22,] -39 -40 [23,] -21 -22 [24,] 13 -70 [25,] -15 -17 [26,] 1 -57 [27,] -26 6 [28,] 4 -53 [29,] 7 -25 [30,] 2 10 [31,] -11 16 [32,] -6 -8 [33,] -31 -36 [34,] -74 -75 [35,] -2 -3 [36,] -46 19 [37,] -65 24 [38,] -14 17 [39,] -4 32 [40,] 38 25 [41,] -1 35 [42,] 26 8 [43,] 27 5 [44,] 23 30 [45,] 28 20 [46,] 15 -63 [47,] 43 33 [48,] 44 29 [49,] 46 -73 [50,] 31 -20 [51,] 42 -58 [52,] -44 -45 [53,] 12 22 [54,] 37 -72 [55,] 14 54 [56,] 39 -7 [57,] 41 -5 [58,] 53 21 [59,] 18 40 [60,] 48 47 [61,] -61 49 [62,] 45 51 [63,] 58 -41 [64,] 55 34 [65,] 61 64 [66,] 57 59 [67,] 56 50 [68,] 52 62 [69,] 60 63 [70,] 66 67 [71,] 68 36 [72,] 69 71 [73,] 70 65 [74,] 73 72 Order of objects: [1] 1 2 3 5 9 10 14 16 18 19 15 17 4 6 8 7 11 12 13 20 61 62 66 63 73 [26] 64 68 65 67 69 71 70 72 74 75 21 22 27 28 29 30 23 24 25 26 32 35 33 34 31 [51] 36 37 38 39 40 42 43 41 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Height: [1] 12 9 22 36 5 24 11 5 2 12 6 54 12 8 20 40 8 5 16 [20] 142 30 5 15 16 33 5 19 11 4 5 6 18 32 9 187 6 14 2 [39] 8 4 16 3 8 26 7 3 13 3 16 9 51 5 18 6 24 6 32 [58] 123 18 48 3 8 15 4 6 31 2 7 13 4 18 67 11 6 Divisive coefficient: [1] 0.958075 2775 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 2.00 52.50 97.00 90.95 128.00 187.00 Metric : manhattan Number of objects : 75 Available components: [1] "order" "height" "dc" "merge" "diss" "call" [7] "order.lab" "data" > str (drusp1) List of 8 $ order : int [1:75] 1 2 3 5 9 10 14 16 18 19 ... $ height : num [1:74] 12 9 22 36 5 24 11 5 2 12 ... $ dc : num 0.958 $ merge : int [1:74, 1:2] -55 -27 -18 -49 -33 -32 -23 -59 -50 -29 ... $ diss :Classes 'dissimilarity', 'dist' atomic [1:2775] 11 12 29 13 25 43 33 22 27 39 ... .. ..- attr(*, "Size")= int 75 .. ..- attr(*, "Metric")= chr "manhattan" .. ..- attr(*, "Labels")= chr [1:75] "1" "2" "3" "4" ... $ call : language diana(x = ruspini, metric = "manhattan") $ order.lab: chr [1:75] "1" "2" "3" "5" ... $ data : int [1:75, 1:2] 4 5 10 9 13 13 12 15 18 19 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:75] "1" "2" "3" "4" ... .. ..$ : chr [1:2] "x" "y" - attr(*, "class")= chr [1:2] "diana" "twins" > > ## From system.file("scripts/ch11.R", package = "MASS") > data(swiss) > swiss.x <- as.matrix(swiss[,-1]) > .p1 <- proc.time() > dCH <- diana(swiss.x) > cat('Time elapsed: ', proc.time() - .p1,'\n') Time elapsed: 0.001 0 0.001 0 0 > str(as.dendrogram(as.hclust(dCH)))# keep back-compatible --[dendrogram w/ 2 branches and 47 members at h = 127] |--[dendrogram w/ 2 branches and 31 members at h = 99.2] | |--[dendrogram w/ 2 branches and 11 members at h = 60.9] | | |--[dendrogram w/ 2 branches and 8 members at h = 29.1] | | | |--[dendrogram w/ 2 branches and 5 members at h = 22.7] | | | | |--[dendrogram w/ 2 branches and 4 members at h = 17.4] | | | | | |--[dendrogram w/ 2 branches and 3 members at h = 11.7] | | | | | | |--leaf "Courtelary" | | | | | | `--[dendrogram w/ 2 branches and 2 members at h = 7.48] | | | | | | |--leaf "Le Locle" | | | | | | `--leaf "ValdeTravers" | | | | | `--leaf "La Chauxdfnd" | | | | `--leaf "La Vallee" | | | `--[dendrogram w/ 2 branches and 3 members at h = 19] | | | |--[dendrogram w/ 2 branches and 2 members at h = 11.5] | | | | |--leaf "Lausanne" | | | | `--leaf "Neuchatel" | | | `--leaf "Vevey" | | `--[dendrogram w/ 2 branches and 3 members at h = 56.1] | | |--leaf "V. De Geneve" | | `--[dendrogram w/ 2 branches and 2 members at h = 21.4] | | |--leaf "Rive Droite" | | `--leaf "Rive Gauche" | `--[dendrogram w/ 2 branches and 20 members at h = 48.4] | |--leaf "Moutier" | `--[dendrogram w/ 2 branches and 19 members at h = 44.3] | |--[dendrogram w/ 2 branches and 18 members at h = 39.1] | | |--[dendrogram w/ 2 branches and 6 members at h = 21.9] | | | |--[dendrogram w/ 2 branches and 4 members at h = 12.1] | | | | |--[dendrogram w/ 2 branches and 2 members at h = 10.8] | | | | | |--leaf "Neuveville" | | | | | `--leaf "Boudry" | | | | `--[dendrogram w/ 2 branches and 2 members at h = 4.56] | | | | |--leaf "Grandson" | | | | `--leaf "Val de Ruz" | | | `--[dendrogram w/ 2 branches and 2 members at h = 13.5] | | | |--leaf "Nyone" | | | `--leaf "Yverdon" | | `--[dendrogram w/ 2 branches and 12 members at h = 20.4] | | |--[dendrogram w/ 2 branches and 7 members at h = 15.1] | | | |--[dendrogram w/ 2 branches and 5 members at h = 11.6] | | | | |--[dendrogram w/ 2 branches and 4 members at h = 8.05] | | | | | |--[dendrogram w/ 2 branches and 3 members at h = 6.79] | | | | | | |--[dendrogram w/ 2 branches and 2 members at h = 4.79] | | | | | | | |--leaf "Aigle" | | | | | | | `--leaf "Morges" | | | | | | `--leaf "Rolle" | | | | | `--leaf "Avenches" | | | | `--leaf "Orbe" | | | `--[dendrogram w/ 2 branches and 2 members at h = 6.04] | | | |--leaf "Moudon" | | | `--leaf "Payerne" | | `--[dendrogram w/ 2 branches and 5 members at h = 17.3] | | |--[dendrogram w/ 2 branches and 4 members at h = 11.2] | | | |--[dendrogram w/ 2 branches and 2 members at h = 7.57] | | | | |--leaf "Aubonne" | | | | `--leaf "Oron" | | | `--[dendrogram w/ 2 branches and 2 members at h = 6.35] | | | |--leaf "Cossonay" | | | `--leaf "Lavaux" | | `--leaf "Paysd'enhaut" | `--leaf "Echallens" `--[dendrogram w/ 2 branches and 16 members at h = 56.2] |--[dendrogram w/ 2 branches and 5 members at h = 20.4] | |--[dendrogram w/ 2 branches and 3 members at h = 12.7] | | |--leaf "Delemont" | | `--[dendrogram w/ 2 branches and 2 members at h = 9.4] | | |--leaf "Franches-Mnt" | | `--leaf "Porrentruy" | `--[dendrogram w/ 2 branches and 2 members at h = 13] | |--leaf "Gruyere" | `--leaf "Sarine" `--[dendrogram w/ 2 branches and 11 members at h = 30] |--[dendrogram w/ 2 branches and 5 members at h = 12.9] | |--[dendrogram w/ 2 branches and 4 members at h = 11.9] | | |--[dendrogram w/ 2 branches and 3 members at h = 8.45] | | | |--leaf "Broye" | | | `--[dendrogram w/ 2 branches and 2 members at h = 4.14] | | | |--leaf "Glane" | | | `--leaf "Veveyse" | | `--leaf "Sion" | `--leaf "Monthey" `--[dendrogram w/ 2 branches and 6 members at h = 16] |--[dendrogram w/ 2 branches and 4 members at h = 7.42] | |--[dendrogram w/ 2 branches and 3 members at h = 5.94] | | |--[dendrogram w/ 2 branches and 2 members at h = 2.05] | | | |--leaf "Conthey" | | | `--leaf "Sierre" | | `--leaf "Herens" | `--leaf "Entremont" `--[dendrogram w/ 2 branches and 2 members at h = 5.09] |--leaf "Martigwy" `--leaf "St Maurice" > cluster/tests/clara-ex.R0000644000175100001440000000244112422156313014732 0ustar hornikusers#### These are *NOT* compared with output in the released version of ### 'cluster' currently library(cluster) data(xclara) ## Try 100 times *different* random samples -- for reliability: nSim <- 100 nCl <- 3 # = no.classes ## unknown problem: this is still platform dependent to some extent: set.seed(107)# (reproducibility; somewhat favorable with "small iDoubt") cl <- replicate(nSim, clara(xclara, nCl, rngR = TRUE)$cluster) tcl <- apply(cl,1, tabulate, nbins = nCl) ## those that are not always in same cluster (5 out of 3000 for this seed): (iDoubt <- which(apply(tcl,2, function(n) all(n < nSim)))) if(FALSE) {# doExtras -- rrr <- lapply(1:128, function(iseed) { set.seed(iseed)# (reproducibility; somewhat favorable with "small iDoubt") cat(iseed, if(iseed %% 10 == 0)"\n") cl <- replicate(nSim, clara(xclara, nCl, rngR = TRUE)$cluster) tcl <- apply(cl,1, tabulate, nbins = nCl) which(apply(tcl,2, function(n) all(n < nSim))) }) ## compare with "true" -- are the "changers" only those with small sil.width? print(system.time(px <- pam(xclara,3)))# 1.84 on lynne(2013) }## doExtras if(length(iDoubt)) { # (not for all seeds) tabD <- tcl[,iDoubt, drop=FALSE] dimnames(tabD) <- list(cluster = paste(1:nCl), obs = format(iDoubt)) t(tabD) # how many times in which clusters } cluster/tests/silhouette-default.Rout.save0000644000175100001440000004453111626750550020550 0ustar hornikusers R Under development (unstable) (2011-08-28 r56813) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## This came from a bug report on R-help by ge yreyt > ## Date: Mon, 9 Jun 2003 16:06:53 -0400 (EDT) > library(cluster) > if(FALSE) # manual testing + library(cluster, lib="~/R/Pkgs/cluster.Rcheck") > > data(iris) > > .proctime00 <- proc.time() > > mdist <- as.dist(1 - cor(t(iris[,1:4])))#dissimlarity > ## this is always the same: > hc <- diana(mdist, diss = TRUE, stand = FALSE) > > maxk <- 15 # at most 15 clusters > silh.wid <- numeric(maxk) # myind[k] := the silh.value for k clusters > silh.wid[1] <- NA # 1-cluster: silhouette not defined > > op <- par(mfrow = c(4,4), mar = .1+ c(2,1,2,1), mgp=c(1.5, .6,0)) > for(k in 2:maxk) { + cat("\n", k,":\n==\n") + k.gr <- cutree(as.hclust(hc), k = k) + cat("grouping table: "); print(table(k.gr)) + si <- silhouette(k.gr, mdist) + cat("silhouette:\n"); print(summary(si)) + plot(si, main = paste("k =",k), + col = 2:(k+1), do.n.k=FALSE, do.clus.stat=FALSE) + silh.wid[k] <- summary(si)$avg.width + ## === + } 2 : == grouping table: k.gr 1 2 50 100 silhouette: Silhouette of 150 units in 2 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 50 100 0.9829965 0.9362626 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.5884 0.9437 0.9611 0.9518 0.9815 0.9918 3 : == grouping table: k.gr 1 2 3 50 50 50 silhouette: Silhouette of 150 units in 3 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 50 50 50 0.9773277 0.6926798 0.7467236 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.03353 0.76940 0.86120 0.80560 0.97560 0.98920 4 : == grouping table: k.gr 1 2 3 4 35 15 50 50 silhouette: Silhouette of 150 units in 4 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 15 50 50 0.5653722 0.5226372 0.6926798 0.7467236 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.03353 0.56620 0.75100 0.66400 0.84240 0.89390 5 : == grouping table: k.gr 1 2 3 4 5 35 15 29 21 50 silhouette: Silhouette of 150 units in 5 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 15 29 21 50 0.5653722 0.5226372 0.5776362 0.4625437 0.5296735 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5404 0.3937 0.6252 0.5372 0.7392 0.8136 6 : == grouping table: k.gr 1 2 3 4 5 6 35 15 29 21 29 21 silhouette: Silhouette of 150 units in 6 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 15 29 21 29 21 0.5653722 0.5226372 0.5776362 0.3732981 0.3383135 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.1094 0.3351 0.5257 0.4968 0.6938 0.8136 7 : == grouping table: k.gr 1 2 3 4 5 6 7 35 14 1 29 21 29 21 silhouette: Silhouette of 150 units in 7 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 29 21 29 21 0.4165289 0.6671435 0.0000000 0.5776362 0.3732981 0.3383135 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.3264 0.3001 0.5234 0.4720 0.6970 0.8301 8 : == grouping table: k.gr 1 2 3 4 5 6 7 8 35 14 1 29 10 11 29 21 silhouette: Silhouette of 150 units in 8 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 29 10 11 29 21 0.4165289 0.6671435 0.0000000 0.4209012 0.6943265 0.7262601 0.2053018 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.6258 0.2576 0.5842 0.4633 0.7132 0.8887 9 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 35 14 1 26 10 11 3 29 21 silhouette: Silhouette of 150 units in 9 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 26 10 11 3 29 0.4165289 0.6671435 0.0000000 0.5318152 0.6673269 0.6944652 0.7957279 0.2053018 21 0.5945444 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.6258 0.3150 0.5896 0.4859 0.7263 0.8870 10 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 35 14 1 26 10 11 3 16 13 21 silhouette: Silhouette of 150 units in 10 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 26 10 11 3 16 0.4165289 0.6671435 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279 0.4640123 13 21 0.6615431 0.4228530 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5870 0.3535 0.6068 0.5208 0.7349 0.8803 11 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 35 14 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 11 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 14 1 26 10 11 3 16 0.4165289 0.6671435 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279 0.4064279 13 11 10 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.3264 0.3730 0.5984 0.5244 0.7302 0.8505 12 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 35 11 3 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 12 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 35 11 3 1 26 10 11 3 0.2883758 0.7044155 0.4092330 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279 16 13 11 10 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.6007 0.3395 0.5817 0.4921 0.7216 0.8700 13 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 28 11 3 7 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 13 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 28 11 3 7 1 26 10 11 0.3783869 0.6827810 0.4092330 0.4285753 0.0000000 0.5318152 0.6319149 0.6145837 3 16 13 11 10 0.7957279 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.4013 0.3314 0.5704 0.5138 0.7274 0.8531 14 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 19 11 3 9 7 1 26 10 11 3 16 13 11 10 silhouette: Silhouette of 150 units in 14 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 19 11 3 9 7 1 26 10 0.5419530 0.6171802 0.3959926 0.4525348 0.1669077 0.0000000 0.5318152 0.6319149 11 3 16 13 11 10 0.6145837 0.7957279 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5929 0.3795 0.5875 0.5217 0.7263 0.8505 15 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 19 11 3 9 7 1 18 10 11 8 3 16 13 11 10 silhouette: Silhouette of 150 units in 15 clusters from silhouette.default(x = k.gr, dist = mdist) : Cluster sizes and average silhouette widths: 19 11 3 9 7 1 18 10 0.5419530 0.6171802 0.3959926 0.4525348 0.1669077 0.0000000 0.6616381 0.5871805 11 8 3 16 13 11 10 0.5171407 0.6705138 0.7444822 0.4064279 0.5866228 0.4297258 0.6590274 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5929 0.3859 0.6211 0.5335 0.7478 0.8551 > par(op) > > summary(si.p <- silhouette(50 - k.gr, mdist)) Silhouette of 150 units in 15 clusters from silhouette.default(x = 50 - k.gr, dist = mdist) : Cluster sizes, ids = (35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49), and average silhouette widths: 10 11 13 16 3 8 11 10 0.6590274 0.4297258 0.5866228 0.4064279 0.7444822 0.6705138 0.5171407 0.5871805 18 1 7 9 3 11 19 0.6616381 0.0000000 0.1669077 0.4525348 0.3959926 0.6171802 0.5419530 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.5929 0.3859 0.6211 0.5335 0.7478 0.8551 > stopifnot(identical(si.p[,3], si[,3]), + identical(si.p[, 1:2], 50 - si[, 1:2])) > > # the widths: > silh.wid [1] NA 0.9518406 0.8055770 0.6639850 0.5371742 0.4967654 0.4720384 [8] 0.4633064 0.4858965 0.5207776 0.5243911 0.4920638 0.5138220 0.5217026 [15] 0.5335255 > #select the number of k clusters with the largest si value : > (myk <- which.min(silh.wid)) # -> 8 (here) [1] 8 > > postscript(file="silhouette-ex.ps") > ## MM: plot to see how the decision is made > plot(silh.wid, type = 'b', col= "blue", xlab = "k") > axis(1, at=myk, col.axis= "red", font.axis= 2) > > ##--- PAM()'s silhouette should give same as silh*.default()! > Eq <- function(x,y, tol = 1e-12) x == y | abs(x - y) < tol * abs((x+y)/2) > > for(k in 2:40) { + cat("\n", k,":\n==\n") + p.k <- pam(mdist, k = k) + k.gr <- p.k$clustering + si.p <- silhouette(p.k) + si.g <- silhouette(k.gr, mdist) + ## since the obs.order may differ (within cluster): + si.g <- si.g[ as.integer(rownames(si.p)), ] + cat("grouping table: "); print(table(k.gr)) + if(!isTRUE(all.equal(c(si.g), c(si.p)))) { + cat("silhouettes differ:") + if(any(neq <- !Eq(si.g[,3], si.p[,3]))) { + cat("\n") + print( cbind(si.p[], si.g[,2:3])[ neq, ] ) + } else cat(" -- but not in col.3 !\n") + } + } 2 : == grouping table: k.gr 1 2 50 100 3 : == grouping table: k.gr 1 2 3 50 50 50 4 : == grouping table: k.gr 1 2 3 4 50 43 37 20 5 : == grouping table: k.gr 1 2 3 4 5 50 25 35 20 20 6 : == grouping table: k.gr 1 2 3 4 5 6 33 17 25 35 20 20 7 : == grouping table: k.gr 1 2 3 4 5 6 7 33 17 17 14 18 31 20 8 : == grouping table: k.gr 1 2 3 4 5 6 7 8 21 13 16 17 14 18 31 20 9 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 21 13 16 12 20 11 19 17 21 10 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 21 13 16 18 10 15 14 7 16 20 11 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 21 13 16 19 10 14 7 6 15 13 16 12 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 21 13 16 17 10 12 9 3 5 15 13 16 13 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 21 12 16 1 18 11 12 9 3 15 13 4 15 14 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 20 10 7 13 18 10 12 9 3 7 10 13 4 14 15 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 20 11 5 13 1 18 10 12 9 3 7 10 13 4 14 16 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 20 11 5 13 1 12 8 9 11 9 3 7 10 13 4 14 17 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 20 11 5 13 1 12 8 7 9 10 3 3 9 13 4 13 9 18 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 20 11 5 9 4 1 12 8 7 9 10 3 3 9 13 4 13 9 19 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 11 5 9 4 1 10 8 8 9 8 3 3 9 13 3 4 13 9 20 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 20 11 5 9 4 1 10 8 8 9 8 3 3 9 12 3 4 6 9 8 21 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 20 11 5 9 4 1 10 8 8 7 8 3 3 7 11 3 4 6 9 8 5 22 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 9 11 5 9 11 4 1 10 8 8 7 8 3 3 7 11 3 4 6 9 8 5 23 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 9 11 5 9 11 4 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1 24 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 15 10 5 10 3 3 3 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1 25 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 8 4 5 9 11 7 2 3 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1 26 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 5 9 11 7 2 3 1 10 8 8 7 8 3 3 7 7 3 4 6 8 8 4 5 1 27 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 5 9 11 7 2 3 1 10 8 7 7 8 3 2 7 7 3 2 4 6 8 8 4 5 27 1 28 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 4 9 11 7 2 3 1 1 10 8 7 7 8 3 2 7 7 3 2 4 6 8 8 4 27 28 5 1 29 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 4 9 11 7 2 3 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 8 8 27 28 29 4 5 1 30 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 11 10 6 3 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 8 27 28 29 30 8 4 5 1 31 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 8 4 11 10 6 3 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 7 27 28 29 30 31 7 4 5 1 2 32 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 10 10 6 2 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 27 28 29 30 31 32 7 7 4 5 1 2 33 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 10 10 6 2 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 1 6 27 28 29 30 31 32 33 7 3 7 4 5 1 2 34 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 7 8 2 2 7 7 3 2 1 1 27 28 29 30 31 32 33 34 6 7 3 7 4 5 1 2 35 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 7 8 2 2 5 7 3 2 1 1 27 28 29 30 31 32 33 34 35 6 5 3 7 4 4 5 1 2 36 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 6 8 2 2 5 7 1 3 2 1 27 28 29 30 31 32 33 34 35 36 1 6 5 3 7 4 4 5 1 2 37 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 3 5 6 8 2 2 5 7 1 3 2 27 28 29 30 31 32 33 34 35 36 37 1 1 6 5 3 7 4 5 1 3 2 38 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 3 5 6 5 2 2 5 3 7 1 3 27 28 29 30 31 32 33 34 35 36 37 38 2 1 1 6 5 3 7 4 5 1 3 2 39 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 7 4 3 8 9 6 2 3 3 1 1 2 1 7 3 8 3 5 6 5 2 2 5 3 7 1 27 28 29 30 31 32 33 34 35 36 37 38 39 3 2 1 1 6 5 3 7 4 5 1 3 2 40 : == grouping table: k.gr 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 5 4 3 7 10 6 2 2 3 3 1 1 2 1 7 3 8 3 5 6 5 2 2 5 3 7 27 28 29 30 31 32 33 34 35 36 37 38 39 40 1 3 2 1 1 6 5 3 7 4 5 1 3 2 > > > ## "pathological" case where a_i == b_i == 0 : > D6 <- structure(c(0, 0, 0, 0.4, 1, 0.05, 1, 1, 0, 1, 1, 0, 0.25, 1, 1), + Labels = LETTERS[1:6], Size = 6, call = as.name("manually"), + class = "dist", Diag = FALSE, Upper = FALSE) > D6 A B C D E B 0.00 C 0.00 0.05 D 0.00 1.00 1.00 E 0.40 1.00 1.00 0.25 F 1.00 0.00 0.00 1.00 1.00 > kl6 <- c(1,1, 2,2, 3,3) > silhouette(kl6, D6)# had one NaN cluster neighbor sil_width [1,] 1 2 0.000 [2,] 1 3 1.000 [3,] 2 1 -0.975 [4,] 2 1 -0.500 [5,] 3 2 -0.375 [6,] 3 1 -0.500 attr(,"Ordered") [1] FALSE attr(,"call") silhouette.default(x = kl6, dist = D6) attr(,"class") [1] "silhouette" > summary(silhouette(kl6, D6)) Silhouette of 6 units in 3 clusters from silhouette.default(x = kl6, dist = D6) : Cluster sizes and average silhouette widths: 2 2 2 0.5000 -0.7375 -0.4375 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. -0.97500 -0.50000 -0.43750 -0.22500 -0.09375 1.00000 > plot(silhouette(kl6, D6))# gives error in earlier cluster versions > > dev.off() pdf 2 > > ## Last Line: > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 3.453 0.03 3.503 0 0 > > cluster/tests/clusplot-out.Rout.save0000644000175100001440000000616111460054255017403 0ustar hornikusers R version 2.12.0 Patched (2010-10-19 r53372) Copyright (C) 2010 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > ### clusplot() & pam() RESULT checking ... > > ## plotting votes.diss(dissimilarity) in a bivariate plot and > ## partitioning into 2 clusters > data(votes.repub) > votes.diss <- daisy(votes.repub) > for(k in 2:4) { + votes.clus <- pam(votes.diss, k, diss = TRUE)$clustering + print(clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE)) + } $Distances [,1] [,2] [1,] 0.0000 154.8601 [2,] 154.8601 0.0000 $Shading [1] 22.12103 20.87897 $Distances [,1] [,2] [,3] [1,] 0.0000 NA 140.8008 [2,] NA 0 NA [3,] 140.8008 NA 0.0000 $Shading [1] 25.602967 5.292663 15.104370 $Distances [,1] [,2] [,3] [,4] [1,] 0.0000 NA 117.2287 280.7259 [2,] NA 0 NA NA [3,] 117.2287 NA 0.0000 NA [4,] 280.7259 NA NA 0.0000 $Shading [1] 15.431339 3.980743 10.145454 19.442464 > > ## plotting iris (dataframe) in a 2-dimensional plot and partitioning > ## into 3 clusters. > data(iris) > iris.x <- iris[, 1:4] > > for(k in 2:5) + print(clusplot(iris.x, pam(iris.x, k)$clustering, diss = FALSE)) $Distances [,1] [,2] [1,] 0.0000000 0.5452161 [2,] 0.5452161 0.0000000 $Shading [1] 18.93861 24.06139 $Distances [,1] [,2] [,3] [1,] 0.000000 1.433071 2.851715 [2,] 1.433071 0.000000 NA [3,] 2.851715 NA 0.000000 $Shading [1] 18.987588 17.166940 9.845472 $Distances [,1] [,2] [,3] [,4] [1,] 0.000000 2.24157 1.6340329 3.0945887 [2,] 2.241570 0.00000 NA NA [3,] 1.634033 NA 0.0000000 0.9461858 [4,] 3.094589 NA 0.9461858 0.0000000 $Shading [1] 12.881766 14.912379 13.652381 7.553474 $Distances [,1] [,2] [,3] [,4] [,5] [1,] 0.000000 1.9899160 1.552387 3.11516148 3.96536391 [2,] 1.989916 0.0000000 NA NA 0.94713417 [3,] 1.552387 NA 0.000000 1.17348334 2.22769309 [4,] 3.115161 NA 1.173483 0.00000000 0.04539385 [5,] 3.965364 0.9471342 2.227693 0.04539385 0.00000000 $Shading [1] 10.369738 11.560147 10.088431 14.857590 5.124093 > > > .Random.seed <- c(0L,rep(7654L,3)) > ## generate 25 objects, divided into 2 clusters. > x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)), + cbind(rnorm(15,5,0.5), rnorm(15,5,0.5))) > print.default(clusplot(px2 <- pam(x, 2))) $Distances [,1] [,2] [1,] 0.000000 5.516876 [2,] 5.516876 0.000000 $Shading [1] 20.18314 22.81686 > > clusplot(px2, labels = 2, col.p = 1 + px2$clustering) > cluster/tests/mona.R0000644000175100001440000000075010473136753014203 0ustar hornikuserslibrary(cluster) data(animals) (mani <- mona(animals)) str(mani) if(require(MASS)) { if(R.version$major != "1" || as.numeric(R.version$minor) >= 7) RNGversion("1.6") set.seed(253) n <- 512; p <- 3 Sig <- diag(p); Sig[] <- 0.8 ^ abs(col(Sig) - row(Sig)) x3 <- mvrnorm(n, rep(0,p), Sig) >= 0 x <- cbind(x3, rbinom(n, size=1, prob = 1/2)) print(sapply(as.data.frame(x), table)) mx <- mona(x) str(mx) print(lapply(mx[c(1,3,4)], table)) } cluster/tests/agnes-ex.Rout.save0000644000175100001440000006306612422156313016444 0ustar hornikusers R version 3.1.2 beta (2014-10-21 r66834) -- "Pumpkin Helmet" Copyright (C) 2014 The R Foundation for Statistical Computing Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > options(digits = 6) > data(votes.repub) > > ## From Matrix' test-tools-1.R : > showProc.time <- local({ ## function + 'pct' variable + pct <- proc.time() + function(final="\n") { ## CPU elapsed __since last called__ + ot <- pct ; pct <<- proc.time() + ## 'Time ..' *not* to be translated: tools::Rdiff() skips its lines! + cat('Time elapsed: ', (pct - ot)[1:3], final) + } + }) > > agn1 <- agnes(votes.repub, metric = "manhattan", stand = TRUE) > summary(agn1) Object of class 'agnes' from call: agnes(x = votes.repub, metric = "manhattan", stand = TRUE) Agglomerative coefficient: 0.797756 Order of objects: [1] Alabama Georgia Arkansas Louisiana Mississippi [6] South Carolina Alaska Vermont Arizona Montana [11] Nevada Colorado Idaho Wyoming Utah [16] California Oregon Washington Minnesota Connecticut [21] New York New Jersey Illinois Ohio Indiana [26] Michigan Pennsylvania New Hampshire Wisconsin Delaware [31] Kentucky Maryland Missouri New Mexico West Virginia [36] Iowa South Dakota North Dakota Kansas Nebraska [41] Maine Massachusetts Rhode Island Florida North Carolina [46] Tennessee Virginia Oklahoma Hawaii Texas Merge: [,1] [,2] [1,] -7 -32 [2,] -13 -35 [3,] -12 -50 [4,] 1 -30 [5,] 2 -14 [6,] -26 -28 [7,] -5 -37 [8,] -15 -41 [9,] -22 -38 [10,] -25 -31 [11,] 7 -47 [12,] -21 -39 [13,] -16 -27 [14,] 4 5 [15,] -42 -46 [16,] -20 10 [17,] 14 9 [18,] -3 6 [19,] -6 3 [20,] -33 15 [21,] 17 -29 [22,] -17 16 [23,] 8 -34 [24,] 21 -49 [25,] 22 -48 [26,] -8 25 [27,] 19 -44 [28,] 11 -23 [29,] 28 24 [30,] -11 -43 [31,] 18 27 [32,] 23 13 [33,] 29 26 [34,] 20 -36 [35,] -1 -10 [36,] 32 -19 [37,] 31 33 [38,] -9 34 [39,] 37 36 [40,] 35 -4 [41,] -2 -45 [42,] 40 -18 [43,] -24 -40 [44,] 39 12 [45,] 44 38 [46,] 41 45 [47,] 42 43 [48,] 46 30 [49,] 47 48 Height: [1] 27.36345 31.15453 35.61832 51.44421 35.69152 87.45523 31.58222 47.53682 [9] 16.34184 11.49397 22.11426 16.35662 10.46294 19.03961 28.41137 11.70132 [17] 12.72838 19.07671 20.90246 8.38200 11.10094 12.92659 9.23004 11.37867 [25] 15.97442 12.70819 16.91515 17.74499 24.83533 18.78225 17.03525 15.77893 [33] 12.71848 18.52818 30.74557 12.55524 17.14634 22.33846 12.80419 27.38835 [41] 37.23685 12.79160 38.76377 29.38432 16.63215 14.75762 25.59605 53.03627 [49] 21.07684 1225 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 8.382 25.540 34.510 45.060 56.020 167.600 Metric : manhattan Number of objects : 50 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" "data" > Dvr <- daisy(votes.repub) > agn2 <- agnes(Dvr, method = "complete") > summary(agn2) Object of class 'agnes' from call: agnes(x = Dvr, method = "complete") Agglomerative coefficient: 0.88084 Order of objects: [1] Alabama Georgia Louisiana Arkansas Florida [6] Texas Mississippi South Carolina Alaska Michigan [11] Connecticut New York New Hampshire Indiana Ohio [16] Illinois New Jersey Pennsylvania Minnesota North Dakota [21] Wisconsin Iowa South Dakota Kansas Nebraska [26] Arizona Nevada Montana Oklahoma Colorado [31] Idaho Wyoming Utah California Oregon [36] Washington Missouri New Mexico West Virginia Delaware [41] Kentucky Maryland North Carolina Tennessee Virginia [46] Hawaii Maine Massachusetts Rhode Island Vermont Merge: [,1] [,2] [1,] -12 -50 [2,] -7 -32 [3,] -14 -35 [4,] -13 -30 [5,] -25 -31 [6,] -37 -47 [7,] -21 -39 [8,] -3 -28 [9,] 4 -38 [10,] -16 -27 [11,] -15 -41 [12,] 8 -26 [13,] -2 -22 [14,] -33 -42 [15,] 14 -46 [16,] 1 -44 [17,] -11 -19 [18,] 2 -29 [19,] -5 6 [20,] -17 -20 [21,] -34 -49 [22,] 5 -48 [23,] 18 3 [24,] 11 10 [25,] 23 9 [26,] -23 21 [27,] -8 20 [28,] 12 -36 [29,] -6 16 [30,] 13 25 [31,] 28 29 [32,] -1 -10 [33,] 19 22 [34,] 17 7 [35,] -4 -9 [36,] 30 26 [37,] 35 -43 [38,] 32 -18 [39,] -24 -40 [40,] 36 24 [41,] 27 15 [42,] 31 33 [43,] 38 37 [44,] 40 42 [45,] 34 -45 [46,] 43 39 [47,] 44 41 [48,] 47 45 [49,] 46 48 Height: [1] 48.2397 60.8984 72.9221 56.1363 58.8227 116.7048 63.0951 281.9508 [9] 28.1437 47.1690 19.4218 32.9438 36.7643 20.2258 39.1728 20.8792 [17] 25.3229 56.3813 42.2230 33.6978 64.5254 26.1547 37.4564 25.9221 [25] 80.4894 23.4206 27.8273 43.4492 48.0483 43.7055 17.1992 31.1988 [33] 70.4868 33.2328 22.1831 54.3057 21.1413 35.1129 121.4022 43.3829 [41] 33.4744 66.7591 29.5099 30.1541 178.4119 32.7611 55.3633 22.6334 [49] 83.1040 1225 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 17.20 48.34 64.68 82.23 105.50 282.00 Metric : euclidean Number of objects : 50 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" > ## almost same: > (ag2. <- agnes(Dvr, method= "complete", keep.diss=FALSE)) Call: agnes(x = Dvr, method = "complete", keep.diss = FALSE) Agglomerative coefficient: 0.88084 Order of objects: [1] Alabama Georgia Louisiana Arkansas Florida [6] Texas Mississippi South Carolina Alaska Michigan [11] Connecticut New York New Hampshire Indiana Ohio [16] Illinois New Jersey Pennsylvania Minnesota North Dakota [21] Wisconsin Iowa South Dakota Kansas Nebraska [26] Arizona Nevada Montana Oklahoma Colorado [31] Idaho Wyoming Utah California Oregon [36] Washington Missouri New Mexico West Virginia Delaware [41] Kentucky Maryland North Carolina Tennessee Virginia [46] Hawaii Maine Massachusetts Rhode Island Vermont Height (summary): Min. 1st Qu. Median Mean 3rd Qu. Max. 17.2 28.1 39.2 52.3 58.8 282.0 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" > ag22 <- agnes(votes.repub, method= "complete", keep.diss=FALSE,keep.data=FALSE) > stopifnot(identical(agn2[-5:-6], ag2.[-5:-6]), + identical(Dvr, daisy(votes.repub)), # DUP=FALSE (!) + identical(ag2.[-6], ag22[-6]) + ) > > data(agriculture) > summary(agnes(agriculture)) Object of class 'agnes' from call: agnes(x = agriculture) Agglomerative coefficient: 0.781893 Order of objects: [1] B NL D F UK DK L I GR P E IRL Merge: [,1] [,2] [1,] -1 -10 [2,] -2 -9 [3,] 1 -3 [4,] 3 -6 [5,] -5 -7 [6,] 4 -12 [7,] 6 2 [8,] -4 -11 [9,] 7 -8 [10,] 8 5 [11,] 9 10 Height: [1] 1.64924 2.24836 2.76918 4.02677 4.78835 2.22036 5.29409 14.77963 [9] 5.16236 8.55075 3.14006 66 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 1.649 4.357 7.987 9.594 13.250 24.040 Metric : euclidean Number of objects : 12 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" "data" > > data(ruspini) > summary(ar0 <- agnes(ruspini, keep.diss=FALSE, keep.data=FALSE)) Object of class 'agnes' from call: agnes(x = ruspini, keep.diss = FALSE, keep.data = FALSE) Agglomerative coefficient: 0.947954 Order of objects: [1] 1 2 3 5 4 6 8 7 9 10 14 15 17 16 18 19 11 12 13 20 61 62 66 63 64 [26] 68 65 67 69 70 71 72 75 73 74 21 22 23 24 27 28 29 30 25 26 32 35 31 36 39 [51] 40 33 34 37 38 41 42 43 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Merge: [,1] [,2] [1,] -18 -19 [2,] -55 -56 [3,] -27 -28 [4,] -49 -51 [5,] -33 -34 [6,] -23 -24 [7,] -67 -69 [8,] -59 -60 [9,] -29 -30 [10,] -36 -39 [11,] -32 -35 [12,] -50 -54 [13,] -25 -26 [14,] -16 1 [15,] -70 -71 [16,] -64 -68 [17,] -37 -38 [18,] 12 -52 [19,] -62 -66 [20,] -12 -13 [21,] -9 -10 [22,] -42 -43 [23,] -15 -17 [24,] -47 -48 [25,] -21 -22 [26,] 7 15 [27,] 2 -57 [28,] 4 -53 [29,] 10 -40 [30,] 3 9 [31,] -73 -74 [32,] -72 -75 [33,] -11 20 [34,] 13 11 [35,] -6 -8 [36,] -14 23 [37,] -2 -3 [38,] -65 26 [39,] 5 17 [40,] 25 6 [41,] 36 14 [42,] 34 -31 [43,] -4 35 [44,] 28 18 [45,] 27 8 [46,] 19 -63 [47,] -46 24 [48,] -1 37 [49,] 16 38 [50,] 40 30 [51,] 42 29 [52,] 33 -20 [53,] 49 32 [54,] 51 39 [55,] 21 41 [56,] 48 -5 [57,] 45 -58 [58,] 53 31 [59,] 43 -7 [60,] 50 54 [61,] 44 57 [62,] -41 22 [63,] -61 46 [64,] 55 52 [65,] 63 58 [66,] -44 -45 [67,] 59 64 [68,] 56 67 [69,] 66 61 [70,] 60 62 [71,] 69 47 [72,] 70 71 [73,] 68 65 [74,] 73 72 Height: [1] 9.26758 6.40312 12.13789 22.37868 7.63441 6.32456 14.58991 [8] 21.63544 4.12311 12.07902 6.36396 4.24264 7.23741 3.56155 [15] 1.41421 16.38921 5.85486 4.12311 10.69547 67.75052 15.48443 [22] 4.12311 8.94386 17.00500 3.60555 9.53375 6.46443 2.82843 [29] 4.48680 3.60555 10.91541 5.83095 14.34411 5.65685 101.14200 [36] 4.47214 6.98022 2.23607 9.56136 2.00000 5.61339 2.82843 [43] 14.95692 3.16228 6.19728 3.00000 7.28356 9.97147 3.00000 [50] 5.47542 11.07404 2.23607 6.60456 3.60555 24.90532 15.45463 [57] 4.24264 64.42555 17.02939 22.56493 2.23607 5.22383 8.28122 [64] 3.16228 3.81721 15.20808 2.00000 5.19258 8.51123 2.82843 [71] 12.62990 34.72475 9.14005 4.47214 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" > summary(ar1 <- agnes(ruspini, metric = "manhattan")) Object of class 'agnes' from call: agnes(x = ruspini, metric = "manhattan") Agglomerative coefficient: 0.946667 Order of objects: [1] 1 2 3 5 4 6 8 7 9 10 14 16 18 19 15 17 11 12 13 20 61 62 66 63 73 [26] 74 64 68 65 67 69 70 71 72 75 21 22 23 24 27 28 29 30 25 26 32 35 31 36 39 [51] 40 33 34 37 38 41 42 43 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Merge: [,1] [,2] [1,] -55 -56 [2,] -27 -28 [3,] -18 -19 [4,] -49 -51 [5,] -36 -39 [6,] -33 -34 [7,] -32 -35 [8,] -23 -24 [9,] -67 -69 [10,] -59 -60 [11,] -50 -54 [12,] -29 -30 [13,] -25 -26 [14,] -16 3 [15,] -70 -71 [16,] -64 -68 [17,] -62 -66 [18,] 11 -52 [19,] -37 -38 [20,] -12 -13 [21,] -9 -10 [22,] 9 15 [23,] 1 -57 [24,] -47 -48 [25,] -42 -43 [26,] -21 -22 [27,] -15 -17 [28,] 4 -53 [29,] 2 12 [30,] 5 -40 [31,] 6 19 [32,] 13 7 [33,] -11 20 [34,] -73 -74 [35,] -72 -75 [36,] -6 -8 [37,] -65 22 [38,] -14 14 [39,] -2 -3 [40,] 32 -31 [41,] 38 27 [42,] 26 8 [43,] 28 18 [44,] -46 24 [45,] -4 36 [46,] 23 10 [47,] -1 39 [48,] 42 29 [49,] 33 -20 [50,] 37 35 [51,] 40 30 [52,] 17 -63 [53,] 16 50 [54,] 51 31 [55,] 46 -58 [56,] 21 41 [57,] 47 -5 [58,] 45 -7 [59,] 52 34 [60,] -44 -45 [61,] 48 54 [62,] 43 55 [63,] 59 53 [64,] 56 49 [65,] -41 25 [66,] -61 63 [67,] 57 58 [68,] 67 64 [69,] 60 62 [70,] 61 65 [71,] 69 44 [72,] 70 71 [73,] 68 66 [74,] 73 72 Height: [1] 11.50000 9.00000 16.00000 26.25000 10.00000 8.00000 16.66667 [8] 28.70833 5.00000 15.50000 8.66667 4.00000 2.00000 9.25000 [15] 6.00000 20.50000 7.50000 5.00000 12.33333 94.33333 22.78571 [22] 5.00000 12.50000 18.00000 8.00000 20.20000 5.00000 13.78571 [29] 8.25000 4.00000 5.50000 5.00000 12.40000 8.00000 125.71357 [36] 6.00000 9.50000 3.00000 11.87500 2.00000 7.00000 4.00000 [43] 18.72917 4.00000 7.50000 3.00000 9.25000 12.40000 3.00000 [50] 7.50000 14.43750 3.00000 7.50000 5.00000 32.38333 21.00000 [57] 6.00000 85.49616 18.00000 28.75000 3.00000 6.50000 9.55556 [64] 4.00000 5.00000 19.61111 2.00000 6.00000 11.00000 4.00000 [71] 15.40000 47.02381 10.00000 6.00000 2775 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 2.00 52.50 97.00 90.95 128.00 187.00 Metric : manhattan Number of objects : 75 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" "data" > str(ar1) List of 9 $ order : int [1:75] 1 2 3 5 4 6 8 7 9 10 ... $ height : num [1:74] 11.5 9 16 26.2 10 ... $ ac : num 0.947 $ merge : int [1:74, 1:2] -55 -27 -18 -49 -36 -33 -32 -23 -67 -59 ... $ diss :Classes 'dissimilarity', 'dist' atomic [1:2775] 11 12 29 13 25 43 33 22 27 39 ... .. ..- attr(*, "Size")= int 75 .. ..- attr(*, "Metric")= chr "manhattan" .. ..- attr(*, "Labels")= chr [1:75] "1" "2" "3" "4" ... $ call : language agnes(x = ruspini, metric = "manhattan") $ method : chr "average" $ order.lab: chr [1:75] "1" "2" "3" "5" ... $ data : num [1:75, 1:2] 4 5 10 9 13 13 12 15 18 19 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:75] "1" "2" "3" "4" ... .. ..$ : chr [1:2] "x" "y" - attr(*, "class")= chr [1:2] "agnes" "twins" > > showProc.time() Time elapsed: 0.049 0.001 0.05 > > summary(ar2 <- agnes(ruspini, metric="manhattan", method = "weighted")) Object of class 'agnes' from call: agnes(x = ruspini, metric = "manhattan", method = "weighted") Agglomerative coefficient: 0.942387 Order of objects: [1] 1 2 3 5 9 10 14 16 18 19 15 17 4 6 8 7 11 12 13 20 61 64 68 65 67 [26] 69 70 71 62 66 63 72 75 73 74 21 22 23 24 27 28 29 30 25 26 32 35 31 36 39 [51] 40 33 34 37 38 41 42 43 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Merge: [,1] [,2] [1,] -55 -56 [2,] -27 -28 [3,] -18 -19 [4,] -49 -51 [5,] -36 -39 [6,] -33 -34 [7,] -32 -35 [8,] -23 -24 [9,] -67 -69 [10,] -59 -60 [11,] -50 -54 [12,] -29 -30 [13,] -25 -26 [14,] -16 3 [15,] -70 -71 [16,] -64 -68 [17,] -62 -66 [18,] 11 -52 [19,] -37 -38 [20,] -12 -13 [21,] -9 -10 [22,] 9 15 [23,] 1 -57 [24,] -47 -48 [25,] -42 -43 [26,] -21 -22 [27,] -15 -17 [28,] 4 -53 [29,] 2 12 [30,] 5 -40 [31,] 6 19 [32,] 13 7 [33,] -11 20 [34,] -73 -74 [35,] -72 -75 [36,] -14 14 [37,] -6 -8 [38,] -65 22 [39,] 36 27 [40,] -2 -3 [41,] 32 -31 [42,] 28 18 [43,] 26 8 [44,] -46 24 [45,] -4 37 [46,] 23 10 [47,] -1 40 [48,] 16 38 [49,] 43 29 [50,] 17 -63 [51,] 41 30 [52,] 33 -20 [53,] 35 34 [54,] 46 -58 [55,] 47 -5 [56,] 51 31 [57,] 21 39 [58,] 45 -7 [59,] -44 -45 [60,] -61 48 [61,] 42 54 [62,] 49 56 [63,] -41 25 [64,] 55 57 [65,] 60 50 [66,] 65 53 [67,] 58 52 [68,] 59 61 [69,] 64 67 [70,] 62 63 [71,] 68 44 [72,] 70 71 [73,] 69 66 [74,] 73 72 Height: [1] 11.5000 9.0000 15.2500 21.7734 5.0000 15.8750 8.0000 4.0000 [9] 2.0000 9.0000 6.0000 32.1172 10.0000 8.0000 16.0000 27.9062 [17] 7.5000 5.0000 13.2500 97.9766 18.3125 5.0000 11.8750 8.2500 [25] 4.0000 5.5000 5.0000 22.3438 5.0000 12.5000 23.7812 8.0000 [33] 14.5000 8.0000 114.9764 6.0000 9.5000 3.0000 11.8750 2.0000 [41] 7.0000 4.0000 20.2031 4.0000 7.5000 3.0000 9.2500 12.6250 [49] 3.0000 7.5000 15.6875 3.0000 7.5000 5.0000 33.5469 21.0000 [57] 6.0000 69.4453 18.0000 28.4375 3.0000 6.5000 9.5000 4.0000 [65] 5.0000 19.1250 2.0000 6.0000 10.5000 4.0000 15.0000 41.5312 [73] 10.0000 6.0000 2775 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 2.00 52.50 97.00 90.95 128.00 187.00 Metric : manhattan Number of objects : 75 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" "data" > print (ar3 <- agnes(ruspini, metric="manhattan", method = "flexible", + par.meth = 0.5)) Call: agnes(x = ruspini, metric = "manhattan", method = "flexible", par.method = 0.5) Agglomerative coefficient: 0.942387 Order of objects: [1] 1 2 3 5 9 10 14 16 18 19 15 17 4 6 8 7 11 12 13 20 61 64 68 65 67 [26] 69 70 71 62 66 63 72 75 73 74 21 22 23 24 27 28 29 30 25 26 32 35 31 36 39 [51] 40 33 34 37 38 41 42 43 44 45 49 51 53 50 54 52 55 56 57 59 60 58 46 47 48 Height (summary): Min. 1st Qu. Median Mean 3rd Qu. Max. 2.00 5.00 8.12 14.20 15.60 115.00 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" "data" > stopifnot(all.equal(ar2[1:4], ar3[1:4], tol=1e-12)) > > showProc.time() Time elapsed: 0.006 0 0.006 > > ## Small example, testing "flexible" vs "single" > i8 <- -c(1:2, 9:10) > dim(agr8 <- agriculture[i8, ]) [1] 8 2 > i5 <- -c(1:2, 8:12) > dim(agr5 <- agriculture[i5, ]) [1] 5 2 > > > chk <- function(d, method=c("single", "complete", "weighted"), + trace.lev = 1, + iC = -(6:7), # <- not using 'call' and 'method' for comparisons + doplot = FALSE, tol = 1e-12) + { + if(!inherits(d, "dist")) d <- daisy(d, "manhattan") + method <- match.arg(method) + par.meth <- list("single" = c(.5, .5, 0, -.5), + "complete"= c(.5, .5, 0, +.5), + "weighted"= c(0.5)) + a.s <- agnes(d, method=method, trace.lev=trace.lev) + ## From theory, this should give the same, but it does not --- why ??? + a.f <- agnes(d, method="flex", par.method = par.meth[[method]], trace.lev=trace.lev) + + if(doplot) { + op <- par(mfrow = c(2,2), mgp = c(1.6, 0.6, 0), mar = .1 + c(4,4,2,1)) + on.exit(par(op)) + plot(a.s) + plot(a.f) + } + structure(all.equal(a.s[iC], a.f[iC], tolerance = tol), + fits = list(s = a.s, f = a.f)) + } > > chk(agr5, trace = 3) C agnes(n=5, method = 2, ..): 4 merging steps nmerge=0, j=2, d_min = D(1,4) = 3.40000; -> (-1,-4); last=4; upd(n,b); old D(A, j), D(B, j), j=2 = (31.5,28.1); new D(A', 2) = 28.1 old D(A, j), D(B, j), j=3 = (14.7,11.3); new D(A', 3) = 11.3 old D(A, j), D(B, j), j=5 = (18.3,14.9); new D(A', 5) = 14.9 --> size(A_new)= 2 nmerge=1, j=2, d_min = D(3,5) = 3.60000; -> (-3,-5); last=5; old D(A, j), D(B, j), j=1 = (11.3,14.9); new D(A', 1) = 11.3 old D(A, j), D(B, j), j=2 = (16.8,13.2); new D(A', 2) = 13.2 --> size(A_new)= 2 nmerge=2, j=2, d_min = D(1,3) = 11.3000; -> (1,2); last=4; upd(n,b); old D(A, j), D(B, j), j=2 = (28.1,13.2); new D(A', 2) = 13.2 --> size(A_new)= 4 nmerge=3, j=2, d_min = D(1,2) = 13.2000; -> (3,-2); last=5; --> size(A_new)= 5 C agnes(n=5, method = 6, ..): |par| = 4, alpha[1:4] = (0.5,0.5,0,-0.5); 4 merging steps nmerge=0, j=2, d_min = D(1,4) = 3.40000; -> (-1,-4); last=4; upd(n,b); old D(A, j), D(B, j), j=2 = (31.5,28.1); new D(A', 2) = 28.1 old D(A, j), D(B, j), j=3 = (14.7,11.3); new D(A', 3) = 11.3 old D(A, j), D(B, j), j=5 = (18.3,14.9); new D(A', 5) = 14.9 --> size(A_new)= 2 nmerge=1, j=2, d_min = D(3,5) = 3.60000; -> (-3,-5); last=5; old D(A, j), D(B, j), j=1 = (11.3,14.9); new D(A', 1) = 11.3 old D(A, j), D(B, j), j=2 = (16.8,13.2); new D(A', 2) = 13.2 --> size(A_new)= 2 nmerge=2, j=2, d_min = D(1,3) = 11.3000; -> (1,2); last=4; upd(n,b); old D(A, j), D(B, j), j=2 = (28.1,13.2); new D(A', 2) = 13.2 --> size(A_new)= 4 nmerge=3, j=2, d_min = D(1,2) = 13.2000; -> (3,-2); last=5; --> size(A_new)= 5 [1] TRUE attr(,"fits") attr(,"fits")$s Call: agnes(x = d, method = method, trace.lev = trace.lev) Agglomerative coefficient: 0.587879 Order of objects: [1] D F E IRL GR Height (summary): Min. 1st Qu. Median Mean 3rd Qu. Max. 3.40 3.55 7.45 7.88 11.80 13.20 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" attr(,"fits")$f Call: agnes(x = d, method = "flex", par.method = par.meth[[method]], trace.lev = trace.lev) Agglomerative coefficient: 0.587879 Order of objects: [1] D F E IRL GR Height (summary): Min. 1st Qu. Median Mean 3rd Qu. Max. 3.40 3.55 7.45 7.87 11.80 13.20 Available components: [1] "order" "height" "ac" "merge" "diss" "call" [7] "method" "order.lab" > > stopifnot(chk(agr5), chk(agr5, "complete", trace = 2), chk(agr5, "weighted"), + chk(agr8), chk(agr8, "complete"), chk(agr8, "weighted", trace.lev=2), + chk(agriculture), chk(agriculture, "complete"), + chk(ruspini), chk(ruspini, "complete"), chk(ruspini, "weighted")) C agnes(n=5, method = 2, ..): 4 merging steps C agnes(n=5, method = 6, ..): |par| = 4, alpha[1:4] = (0.5,0.5,0,-0.5); 4 merging steps C agnes(n=5, method = 3, ..): 4 merging steps nmerge=0, j=2, d_min = D(1,4) = 3.40000; last=4; upd(n,b); size(A_new)= 2 nmerge=1, j=2, d_min = D(3,5) = 3.60000; last=5; size(A_new)= 2 nmerge=2, j=2, d_min = D(2,3) = 16.8000; last=4; size(A_new)= 3 nmerge=3, j=2, d_min = D(1,2) = 31.5000; last=3; size(A_new)= 5 C agnes(n=5, method = 6, ..): |par| = 4, alpha[1:4] = (0.5,0.5,0,0.5); 4 merging steps nmerge=0, j=2, d_min = D(1,4) = 3.40000; last=4; upd(n,b); size(A_new)= 2 nmerge=1, j=2, d_min = D(3,5) = 3.60000; last=5; size(A_new)= 2 nmerge=2, j=2, d_min = D(2,3) = 16.8000; last=4; size(A_new)= 3 nmerge=3, j=2, d_min = D(1,2) = 31.5000; last=3; size(A_new)= 5 C agnes(n=5, method = 5, ..): 4 merging steps C agnes(n=5, method = 6, ..): 4 merging steps C agnes(n=8, method = 2, ..): 7 merging steps C agnes(n=8, method = 6, ..): |par| = 4, alpha[1:4] = (0.5,0.5,0,-0.5); 7 merging steps C agnes(n=8, method = 3, ..): 7 merging steps C agnes(n=8, method = 6, ..): |par| = 4, alpha[1:4] = (0.5,0.5,0,0.5); 7 merging steps C agnes(n=8, method = 5, ..): 7 merging steps nmerge=0, j=2, d_min = D(1,4) = 3.40000; last=4; upd(n,b); size(A_new)= 2 nmerge=1, j=2, d_min = D(3,5) = 3.60000; last=5; size(A_new)= 2 nmerge=2, j=2, d_min = D(1,6) = 5.40000; last=6; upd(n,b); size(A_new)= 3 nmerge=3, j=2, d_min = D(2,7) = 6.70000; last=7; upd(n,b); size(A_new)= 2 nmerge=4, j=2, d_min = D(1,8) = 7.75000; last=8; upd(n,b); size(A_new)= 4 nmerge=5, j=2, d_min = D(2,3) = 11.6500; last=7; size(A_new)= 4 nmerge=6, j=2, d_min = D(1,2) = 18.3750; last=5; size(A_new)= 8 C agnes(n=8, method = 6, ..): 7 merging steps nmerge=0, j=2, d_min = D(1,4) = 3.40000; last=4; upd(n,b); size(A_new)= 2 nmerge=1, j=2, d_min = D(3,5) = 3.60000; last=5; size(A_new)= 2 nmerge=2, j=2, d_min = D(1,6) = 5.40000; last=6; upd(n,b); size(A_new)= 3 nmerge=3, j=2, d_min = D(2,7) = 6.70000; last=7; upd(n,b); size(A_new)= 2 nmerge=4, j=2, d_min = D(1,8) = 7.75000; last=8; upd(n,b); size(A_new)= 4 nmerge=5, j=2, d_min = D(2,3) = 11.6500; last=7; size(A_new)= 4 nmerge=6, j=2, d_min = D(1,2) = 18.3750; last=5; size(A_new)= 8 C agnes(n=12, method = 2, ..): 11 merging steps C agnes(n=12, method = 6, ..): |par| = 4, alpha[1:4] = (0.5,0.5,0,-0.5); 11 merging steps C agnes(n=12, method = 3, ..): 11 merging steps C agnes(n=12, method = 6, ..): |par| = 4, alpha[1:4] = (0.5,0.5,0,0.5); 11 merging steps C agnes(n=75, method = 2, ..): 74 merging steps C agnes(n=75, method = 6, ..): |par| = 4, alpha[1:4] = (0.5,0.5,0,-0.5); 74 merging steps C agnes(n=75, method = 3, ..): 74 merging steps C agnes(n=75, method = 6, ..): |par| = 4, alpha[1:4] = (0.5,0.5,0,0.5); 74 merging steps C agnes(n=75, method = 5, ..): 74 merging steps C agnes(n=75, method = 6, ..): 74 merging steps > > showProc.time() Time elapsed: 0.038 0.001 0.039 > > ## an invalid "flexible" case - now must give error early: > x <- rbind(c( -6, -9), c( 0, 13), + c(-15, 6), c(-14, 0), c(12,-10)) > (dx <- daisy(x, "manhattan")) Dissimilarities : 1 2 3 4 2 28 3 24 22 4 17 27 7 5 19 35 43 36 Metric : manhattan Number of objects : 5 > a.x <- tryCatch(agnes(dx, method="flexible", par = -.2), + error = function(e)e) > ## agnes(method=6, par.method=*) lead to invalid merge; step 4, D(.,.)=-26.1216 > if(!inherits(a.x, "error")) stop("invalid 'par' in \"flexible\" did not give error") > if(!all(vapply(c("par[.]method", "merge"), grepl, NA, x=a.x$message))) + stop("error message did not contain expected words") > > > proc.time() user system elapsed 0.243 0.013 0.311 cluster/tests/pam.R0000644000175100001440000001472012463225266014027 0ustar hornikuserslibrary(cluster) ## Compare on these: nms <- c("clustering", "objective", "isolation", "clusinfo", "silinfo") nm2 <- c("medoids", "id.med", nms) nm3 <- nm2[- pmatch("obj", nm2)] (x <- x0 <- cbind(V1 = (-3:4)^2, V2 = c(0:6,NA), V3 = c(1,2,NA,7,NA,8:9,8))) (px <- pam(x,2, metric="manhattan")) stopifnot(identical(x,x0))# DUP=FALSE .. pd <- pam(dist(x,"manhattan"), 2) px2 <- pam(x,2, metric="manhattan", keep.diss=FALSE, keep.data=FALSE) pdC <- pam(x,2, metric="manhattan", cluster.only = TRUE) p1 <- pam(x,1, metric="manhattan") stopifnot(identical(px[nms], pd[nms]), identical(px[nms], px2[nms]), identical(pdC, px2$clustering), ## and for default dist "euclidean": identical(pam(x, 2)[nms], pam(dist(x),2)[nms]), identical(p1[c("id.med", "objective", "clusinfo")], list(id.med = 6L, objective = c(build=9.25, swap=9.25), clusinfo = array(c(8, 18, 9.25, 45, 0), dim = c(1, 5), dimnames=list(NULL, c("size", "max_diss", "av_diss", "diameter", "separation"))))), p1$clustering == 1, is.null(p1$silinfo) ) set.seed(253) ## generate 250 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(120, 0,8), rnorm(120, 0,8)), cbind(rnorm(130,50,8), rnorm(130,10,8))) .proctime00 <- proc.time() summary(px2 <- pam(x, 2)) pdx <- pam(dist(x), 2) all.equal(px2[nms], pdx[nms], tol = 1e-12) ## TRUE pdxK <- pam(dist(x), 2, keep.diss = TRUE) stopifnot(identical(pdx[nm2], pdxK[nm2])) spdx <- silhouette(pdx) summary(spdx) spdx postscript("pam-tst.ps") if(FALSE) plot(spdx)# the silhouette ## is now identical : plot(pdx)# failed in 1.7.0 -- now only does silhouette par(mfrow = 2:1) ## new 'dist' argument for clusplot(): plot(pdx, dist=dist(x)) ## but this should work automagically (via eval()) as well: plot(pdx) ## or this clusplot(pdx) data(ruspini) summary(pr4 <- pam(ruspini, 4)) (pr3 <- pam(ruspini, 3)) (pr5 <- pam(ruspini, 5)) data(votes.repub) summary(pv3 <- pam(votes.repub, 3)) (pv4 <- pam(votes.repub, 4)) (pv6 <- pam(votes.repub, 6, trace = 3)) cat('Time elapsed: ', proc.time() - .proctime00,'\n') ## re-starting with medoids from pv6 shouldn't change: pv6. <- pam(votes.repub, 6, medoids = pv6$id.med, trace = 3) identical(pv6[nm3], pv6.[nm3]) ## This example seg.faulted at some point: d.st <- data.frame(V1= c(9, 12, 12, 15, 9, 9, 13, 11, 15, 10, 13, 13, 13, 15, 8, 13, 13, 10, 7, 9, 6, 11, 3), V2= c(5, 9, 3, 5, 1, 1, 2, NA, 10, 1, 4, 7, 4, NA, NA, 5, 2, 4, 3, 3, 6, 1, 1), V3 = c(63, 41, 59, 50, 290, 226, 60, 36, 32, 121, 70, 51, 79, 32, 42, 39, 76, 60, 56, 88, 57, 309, 254), V4 = c(146, 43, 78, 88, 314, 149, 78, NA, 238, 153, 159, 222, 203, NA, NA, 74, 100, 111, 9, 180, 50, 256, 107)) dd <- daisy(d.st, stand = TRUE) (r0 <- pam(dd, 5))# cluster 5 = { 23 } -- on single observation ## pam doing the "daisy" computation internally: r0s <- pam(d.st, 5, stand=TRUE, keep.diss=FALSE, keep.data=FALSE) (ii <- which(names(r0) %in% c("call","medoids"))) stopifnot(all.equal(r0[-ii], r0s[-ii], tol=1e-14), identical(r0s$medoids, data.matrix(d.st)[r0$medoids, ])) ## This gave only 3 different medoids -> and seg.fault: (r5 <- pam(dd, 5, medoids = c(1,3,20,2,5), trace = 2)) # now "fine" dev.off() ##------------------------ Testing pam() with new "pamonce" argument: ## This is from "next version of Matrix" test-tools-1.R: showSys.time <- function(expr) { ## prepend 'Time' for R CMD Rdiff st <- system.time(expr) writeLines(paste("Time", capture.output(print(st)))) invisible(st) } show2Ratios <- function(...) { stopifnot(length(rgs <- list(...)) == 2, nchar(ns <- names(rgs)) > 0) r <- round(cbind(..1, ..2)[c(1,3),], 3) dimnames(r) <- list(paste("Time ", rownames(r)), ns) r } n <- 1000 ## If not enough cases, all CPU times equals 0. n <- 500 # for now, and automatic testing sd <- 0.5 set.seed(13) n2 <- as.integer(round(n * 1.5)) x <- rbind(cbind(rnorm( n,0,sd), rnorm( n,0,sd)), cbind(rnorm(n2,5,sd), rnorm(n2,5,sd)), cbind(rnorm(n2,7,sd), rnorm(n2,7,sd)), cbind(rnorm(n2,9,sd), rnorm(n2,9,sd))) ## original algorithm st0 <- showSys.time(pamx <- pam(x, 4, trace.lev=2))# 8.157 0.024 8.233 ## bswapPamOnce algorithm st1 <- showSys.time(pamxonce <- pam(x, 4, pamonce=TRUE, trace.lev=2))# 6.122 0.024 6.181 ## bswapPamOnceDistIndice st2 <- showSys.time(pamxonce2 <- pam(x, 4, pamonce = 2, trace.lev=2))# 4.101 0.024 4.151 show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) ## only call element is not equal (icall <- which(names(pamx) == "call")) pamx[[icall]] stopifnot(all.equal(pamx [-icall], pamxonce [-icall]), all.equal(pamxonce[-icall], pamxonce2[-icall])) ## Same using specified medoids (med0 <- 1 + round(n* c(0,1, 2.5, 4)))# lynne (~ 2010, AMD Phenom II X4 925) st0 <- showSys.time(pamxst <- pam(x, 4, medoids = med0, trace.lev=2))# 13.071 0.024 13.177 st1 <- showSys.time(pamxoncest <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))# 8.503 0.024 8.578 st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2, trace.lev=2))# 5.587 0.025 5.647 show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) ## only call element is not equal stopifnot(all.equal(pamxst [-icall], pamxoncest [-icall]), all.equal(pamxoncest[-icall], pamxonce2st[-icall])) ## Different starting values med0 <- 1:4 # lynne (~ 2010, AMD Phenom II X4 925) st0 <- showSys.time(pamxst <- pam(x, 4, medoids = med0, trace.lev=2))# 13.416 0.023 13.529 st1 <- showSys.time(pamxoncest <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))# 8.384 0.024 8.459 st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2, trace.lev=2))# 5.455 0.030 5.520 show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) ## only call element is not equal stopifnot(all.equal(pamxst [-icall], pamxoncest [-icall]), all.equal(pamxoncest[-icall], pamxonce2st[-icall])) ## Medoid bug --- MM: Fixed, well "0L+ hack", in my pam.q, on 2012-01-31 ## ---------- med0 <- (1:6) st0 <- showSys.time(pamxst <- pam(x, 6, medoids = med0 , trace.lev=2)) stopifnot(identical(med0, 1:6)) med0 <- (1:6) st1 <- showSys.time(pamxst.1 <- pam(x, 6, medoids = med0 , pamonce=1, trace.lev=2)) stopifnot(identical(med0, 1:6)) stopifnot(all.equal(pamxst[-icall], pamxst.1 [-icall])) ## Last Line: cat('Time elapsed: ', proc.time() - .proctime00,'\n') cluster/tests/clara-NAs.R0000644000175100001440000000421110537315624015003 0ustar hornikuserslibrary(cluster) x <- cbind(c(0, -4, -22, -14, 0, NA, -28, 1, 10, -1, 100 + c(13, 0, 2, 4, 7, 8, 1)), c(-5, -14, NA, -35, -30, NA, 7, 2, -18, 13, 47, 64, 48, NA, NA, 44, 65)) x (d <- dist(x,'manhattan')) summary(d, na.rm = TRUE) # max = 270 ## First call with "trace" (seg.fault typically later ...): try( clara(x, k=2, metric="manhattan", sampsize=10, trace = 3) ) ## Originally:already shows the problem: nbest[] = c(0,0,...,0) must be WRONG!! ## Now: gives the proper error message. ## S-plus 6.1.2 (rel.2 for Linux, 2002) gives ##> cc <- clara(x, k=2, metric="manhattan", samples=2, sampsize=10) ## Problem in .Fortran("clara",: Internal error: data for decrementing ## ref.count didn't point to a valid arena (0x0), while calling subroutine clara ## The large example from clara.R -- made small enough to still provoke ## the "** dysta2() ... OUT" problem {no longer!} x <- matrix(c(0, 3, -4, 62, 1, 3, -7, 45, 36, 46, 45, 54, -10, 51, 49, -5, 13, -6, 49, 52, 57, 39, -1, 55, 68, -3, 51, 11, NA, 9, -3, 50, NA, 58, 9, 52, 12, NA, 47, -12, -6, -9, 5, 30, 38, 54, -5, 39, 50, 50, 54, 43, 7, 64, 55, 4, 0, 72, 54, 37, 59, -1, 8, 43, 50, -2, 56, -8, 43, 6, 4, 48, -2, 14, 45, 49, 56, 51, 45, 11, 10, 42, 50, 2, -12, 3, 1, 2, 2, -14, -4, 8, 0, 3, -11, 8, 5, 14, -1, 9, 0, 19, 10, -2, -9, 9, 2, 16, 10, 4, 1, 12, 7, -4, 27, -8, -9, -9, 2, 8, NA, 13, -23, -3, -5, 1, 15, -3, 5, -9, -5, 14, 8, 7, -4, 26, 20, 10, 8, 17, 4, 14, 23, -2, 23, 2, 16, 5, 5, -3, 12, 5, 14, -2, 4, 2, -2, 7, 9, 1, -15, -1, 9, 23, 1, 7, 13, 2, -11, 16, 12, -11, -14, 2, 6, -8), ncol = 2) str(x) # 88 x 2 try(clara(x, 2, samples = 20, trace = 3))# 2nd sample did show dysta2() problem ## To see error message for > 1 missing: try(clara(rbind(NA,x), 2)) x <- x[-33,] ## still had the ** dysta2() .. OUT" problem {no longer!} clara(x, 2, samples = 12, trace = 3) data(xclara) set.seed(123) xclara[sample(nrow(xclara), 50),] <- NA try( clara(xclara, k = 3) ) #-> "nice" error message depicting first 12 missing obs cluster/tests/clara.R0000644000175100001440000000644011466450711014331 0ustar hornikuserslibrary(cluster) ## generate 1500 objects, divided into 2 clusters. set.seed(144) x <- rbind(cbind(rnorm(700, 0,8), rnorm(700, 0,8)), cbind(rnorm(800,50,8), rnorm(800,10,8))) isEq <- function(x,y, epsF = 100) is.logical(r <- all.equal(x,y, tol = epsF * .Machine$double.eps)) && r .proctime00 <- proc.time() ## full size sample {should be = pam()}: n0 <- length(iSml <- c(1:70, 701:720)) summary(clara0 <- clara(x[iSml,], k = 2, sampsize = n0)) pam0 <- pam (x[iSml,], k = 2) stopifnot(identical(clara0$clustering, pam0$clustering) , isEq(clara0$objective, unname(pam0$objective[2])) ) summary(clara2 <- clara(x, 2)) clInd <- c("objective", "i.med", "medoids", "clusinfo") clInS <- c(clInd, "sample") ## clara() {as original code} always draws the *same* random samples !!!! clara(x, 2, samples = 50)[clInd] for(i in 1:20) print(clara(x[sample(nrow(x)),], 2, samples = 50)[clInd]) clara(x, 2, samples = 101)[clInd] clara(x, 2, samples = 149)[clInd] clara(x, 2, samples = 200)[clInd] ## Note that this last one is practically identical to the slower pam() one (ii <- sample(length(x), 20)) ## This was bogous (and lead to seg.faults); now properly gives error. ## but for these, now see ./clara-NAs.R if(FALSE) { ## ~~~~~~~~~~~~~ x[ii] <- NA try( clara(x, 2, samples = 50) ) } ###-- Larger example: 2000 objects, divided into 5 clusters. x5 <- rbind(cbind(rnorm(400, 0,4), rnorm(400, 0,4)), cbind(rnorm(400,10,8), rnorm(400,40,6)), cbind(rnorm(400,30,4), rnorm(400, 0,4)), cbind(rnorm(400,40,4), rnorm(400,20,2)), cbind(rnorm(400,50,4), rnorm(400,50,4))) ## plus 1 random dimension x5 <- cbind(x5, rnorm(nrow(x5))) clara(x5, 5) summary(clara(x5, 5, samples = 50)) ## 3 "half" samples: clara(x5, 5, samples = 999) clara(x5, 5, samples = 1000) clara(x5, 5, samples = 1001) clara(x5, 5, samples = 2000)#full sample ###--- Start a version of example(clara) ------- ## xclara : artificial data with 3 clusters of 1000 bivariate objects each. data(xclara) (clx3 <- clara(xclara, 3)) ## Plot similar to Figure 5 in Struyf et al (1996) plot(clx3) ## The rngR = TRUE case is currently in the non-strict tests ## ./clara-ex.R ## ~~~~~~~~~~~~ ###--- End version of example(clara) ------- ## small example(s): data(ruspini) clara(ruspini,4) rus <- data.matrix(ruspini); storage.mode(rus) <- "double" ru2 <- rus[c(1:7,21:28, 45:51, 61:69),] ru3 <- rus[c(1:4,21:25, 45:48, 61:63),] ru4 <- rus[c(1:2,21:22, 45:47),] ru5 <- rus[c(1:2,21, 45),] daisy(ru5, "manhattan") ## Dissimilarities : 11 118 143 107 132 89 ## no problem anymore, since 2002-12-28: ## sampsize >= k+1 is now enforced: ## clara(ru5, k=3, met="manhattan", sampsize=3,trace=2)[clInS] clara(ru5, k=3, met="manhattan", sampsize=4,trace=1)[clInS] daisy(ru4, "manhattan") ## this one (k=3) gave problems, from ss = 6 on ___ still after 2002-12-28 ___ : for(ss in 4:nrow(ru4)){ cat("---\n\nsample size = ",ss,"\n") print(clara(ru4,k=3,met="manhattan",sampsize=ss)[clInS]) } for(ss in 5:nrow(ru3)){ cat("---\n\nsample size = ",ss,"\n") print(clara(ru3,k=4,met="manhattan",sampsize=ss)[clInS]) } ## Last Line: cat('Time elapsed: ', proc.time() - .proctime00,'\n') ## Lynne (P IV, 1.6 GHz): 18.81; then (no NA; R 1.9.0-alpha): 15.07 ## nb-mm (P III,700 MHz): 27.97 cluster/tests/fanny-ex.R0000644000175100001440000000557311646600064015000 0ustar hornikusers.libPaths() # show full library tree {also as check of R CMD check!} library(cluster) ####---------- Tests for FANNY i.e., fanny() -------------------------- #### ### -- thanks to ../.Rbuildignore , the output of this is ### -- only compared to saved values for the maintainer ###--- An extension of example(fanny) : ------------------- set.seed(21) ## generate 10+15 objects in two clusters, plus 3 objects lying ## between those clusters. x <- rbind(cbind(rnorm(10, 0, 0.5), rnorm(10, 0, 0.5)), cbind(rnorm(15, 5, 0.5), rnorm(15, 5, 0.5)), cbind(rnorm( 3,3.2,0.5), rnorm( 3,3.2,0.5))) .proctime00 <- proc.time() (fannyx <- fanny(x, 2)) summary(fannyx) str(fannyx) ## Different platforms differ (even gcc 3.0.1 vs 3.2 on same platform)! ## {70 or 71 iterations} ## ==> No "fanny-ex.Rout.save" is distributed ! ## -------------------------------------------- summary(fanny(x,3))# one extra cluster (fanny(x,2, memb.exp = 1.5)) (fanny(x,2, memb.exp = 1.2)) (fanny(x,2, memb.exp = 1.1)) (fanny(x,2, memb.exp = 3)) data(ruspini) # < to run under R 1.9.1 summary(fanny(ruspini, 3), digits = 9) summary(fanny(ruspini, 4), digits = 9)# 'correct' #{clusters} summary(fanny(ruspini, 5), digits = 9) cat('Time elapsed: ', proc.time() - .proctime00,'\n') data(chorSub) p4cl <- pam(chorSub, k = 4, cluster.only = TRUE) ## The first two are "completely fuzzy" -- and now give a warnings f4.20 <- fanny(chorSub, k = 4, trace.lev = 1) ; f4.20$coef f4.18 <- fanny(chorSub, k = 4, memb.exp = 1.8) # same problem f4.18. <- fanny(chorSub, k = 4, memb.exp = 1.8, iniMem.p = f4.20$membership) # very quick convergence stopifnot(all.equal(f4.18[-c(7,9)], f4.18.[-c(7,9)], tol = 5e-7)) f4.16 <- fanny(chorSub, k = 4, memb.exp = 1.6) # now gives 4 crisp clusters f4.16. <- fanny(chorSub, k = 4, memb.exp = 1.6, iniMem.p = f4.18$membership, trace.lev = 2)# "converges" immediately - WRONGLY! f4.16.2 <- fanny(chorSub, k = 4, memb.exp = 1.6, iniMem.p = cluster:::as.membership(p4cl), tol = 1e-10, trace.lev = 2)## looks much better: stopifnot(f4.16$clustering == f4.16.2$clustering, all.equal(f4.16[-c(1,7,9)], f4.16.2[-c(1,7,9)], tol = 1e-7), all.equal(f4.16$membership, f4.16.2$membership, tol = 0.001)) ## the memberships are quite close but have only converged to precision 0.000228 f4.14 <- fanny(chorSub, k = 4, memb.exp = 1.4) f4.12 <- fanny(chorSub, k = 4, memb.exp = 1.2) table(f4.12$clustering, f4.14$clustering)# close but different table(f4.16$clustering, f4.14$clustering)# dito table(f4.12$clustering, f4.16$clustering)# hence differ even more symnum(cbind(f4.16$membership, 1, f4.12$membership), cutpoints= c(0., 0.2, 0.6, 0.8, 0.9, 0.95, 1 -1e-7, 1 +1e-7), symbols = c(" ", ".", ",", "+", "*", "B","1")) ## Last Line: cat('Time elapsed: ', proc.time() - .proctime00,'\n') cluster/tests/daisy-ex.R0000644000175100001440000000732311646600064014771 0ustar hornikusers## For different cluster versions require(cluster) if(interactive()) { (pkgPath <- .find.package("cluster", verbose = TRUE)) (verC <- readLines(Dfile <- file.path(pkgPath, "DESCRIPTION"), n = 2)[2]) } ## trivial cases should 'work': daisy(cbind(1)) (d10 <- daisy(matrix(0., 1,0))); str(d10) d01 <- daisy(matrix(0., 0,1)) if(paste(R.version$major, R.version$minor, sep=".") >= "2.1.0") print(d01) str(d01) d32 <- data.frame(eins=c("A"=1,"B"=1,"C"=1), zwei=c(2,2,2)) daisy(d32) daisy(d32, stand = TRUE) daisy(d32, type = list(ordratio="zwei")) str(d5 <- data.frame(a= c(0, 0, 0,1,0,0, 0,0,1, 0,NA), b= c(NA,0, 1,1,0,1, 0,1,0, 1,0), c= c(0, 1, 1,0,1,NA,1,0,1, 0,NA), d= c(1, 1, 0,1,0,0, 0,0,0, 1,0), e= c(1, NA,0,1,0,0, 0,0,NA,1,1))) (d0 <- daisy(d5)) (d1 <- daisy(d5, type = list(asymm = 1:5))) (d2 <- daisy(d5, type = list(symm = 1:2, asymm= 3:5))) (d2.<- daisy(d5, type = list( asymm= 3:5))) stopifnot(identical(c(d2), c(d2.))) (dS <- daisy(d5, stand = TRUE))# gave error in some versions stopifnot(all.equal(as.vector(summary(c(dS), digits=9)), c(0, 2.6142638, 3.4938562, 3.2933687, 4.0591077, 5.5580177), tol = 1e-7))# 7.88e-9 d5[,4] <- 1 # binary with only one instead of two values (d0 <- daisy(d5)) (d1 <- daisy(d5, type = list(asymm = 1:5)))# 2 NAs (d2 <- daisy(d5, type = list(symm = 1:2, asymm= 3:5))) (d2.<- daisy(d5, type = list( asymm= 3:5))) ## better leave away the constant variable: it has no effect: stopifnot(identical(c(d1), c(daisy(d5[,-4], type = list(asymm = 1:4))))) ###---- Trivial "binary only" matrices (not data frames) did fail: x <- matrix(0, 2, 2) dimnames(x)[[2]] <- c("A", "B")## colnames<- is missing in S+ daisy(x, type = list(symm= "B", asymm="A")) daisy(x, type = list(symm= "B"))# 0 too x2 <- x; x2[2,2] <- 1 daisy(x2, type= list(symm = "B"))# |-> 0.5 (gives 1 in S+) daisy(x2, type= list(symm = "B", asymm="A"))# 1 x3 <- x; x3[] <- diag(2) daisy(x3) # warning: both as interval scaled -> sqrt(2) daisy(x3, type= list(symm="B", asymm="A"))# 1 daisy(x3, type= list(symm =c("B","A"))) # 1, S+: sqrt(2) daisy(x3, type= list(asymm=c("B","A"))) # 1, S+ : sqrt(2) x4 <- rbind(x3, 1) daisy(x4, type= list(symm="B", asymm="A"))# 1 0.5 0.5 daisy(x4, type= list(symm=c("B","A"))) # dito; S+ : 1.41 1 1 daisy(x4, type= list(asymm=c("A","B"))) # dito, dito ## ----------- example(daisy) ----------------------- data(flower) data(agriculture) ## Example 1 in ref: ## Dissimilarities using Euclidean metric and without standardization (d.agr <- daisy(agriculture, metric = "euclidean", stand = FALSE)) (d.agr2 <- daisy(agriculture, metric = "manhattan")) ## Example 2 in ref (dfl0 <- daisy(flower)) stopifnot(identical(c(dfl0), c(daisy(flower, type = list(symm = 1)))) && identical(c(dfl0), c(daisy(flower, type = list(symm = 2)))) && identical(c(dfl0), c(daisy(flower, type = list(symm = 3)))) && identical(c(dfl0), c(daisy(flower, type = list(symm = c(1,3))))) ) (dfl1 <- daisy(flower, type = list(asymm = 3))) (dfl2 <- daisy(flower, type = list(asymm = c(1, 3), ordratio = 7))) (dfl3 <- daisy(flower, type = list(asymm = 1:3))) ## --- animals data(animals) d0 <- daisy(animals) d1 <- daisy(animals - 1, type=list(asymm=c(2,4))) (d2 <- daisy(animals - 1, type=list(symm = c(1,3,5,6), asymm=c(2,4)))) stopifnot(c(d1) == c(d2)) d3 <- daisy(2 - animals, type=list(asymm=c(2,4))) (d4 <- daisy(2 - animals, type=list(symm = c(1,3,5,6), asymm=c(2,4)))) stopifnot(c(d3) == c(d4)) pairs(cbind(d0,d2,d4), main = "Animals -- symmetric and asymm. dissimilarities") cluster/tests/pam.Rout.save0000644000175100001440000014650212463225266015520 0ustar hornikusers R version 3.1.2 Patched (2015-01-31 r67684) -- "Pumpkin Helmet" Copyright (C) 2015 The R Foundation for Statistical Computing Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > ## Compare on these: > nms <- c("clustering", "objective", "isolation", "clusinfo", "silinfo") > nm2 <- c("medoids", "id.med", nms) > nm3 <- nm2[- pmatch("obj", nm2)] > > (x <- x0 <- cbind(V1 = (-3:4)^2, V2 = c(0:6,NA), V3 = c(1,2,NA,7,NA,8:9,8))) V1 V2 V3 [1,] 9 0 1 [2,] 4 1 2 [3,] 1 2 NA [4,] 0 3 7 [5,] 1 4 NA [6,] 4 5 8 [7,] 9 6 9 [8,] 16 NA 8 > (px <- pam(x,2, metric="manhattan")) Medoids: ID V1 V2 V3 [1,] 2 4 1 2 [2,] 6 4 5 8 Clustering vector: [1] 1 1 1 2 2 2 2 2 Objective function: build swap 6.375 6.375 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > stopifnot(identical(x,x0))# DUP=FALSE .. > pd <- pam(dist(x,"manhattan"), 2) > px2 <- pam(x,2, metric="manhattan", keep.diss=FALSE, keep.data=FALSE) > pdC <- pam(x,2, metric="manhattan", cluster.only = TRUE) > p1 <- pam(x,1, metric="manhattan") > > stopifnot(identical(px[nms], pd[nms]), + identical(px[nms], px2[nms]), + identical(pdC, px2$clustering), + ## and for default dist "euclidean": + identical(pam(x, 2)[nms], + pam(dist(x),2)[nms]), + identical(p1[c("id.med", "objective", "clusinfo")], + list(id.med = 6L, objective = c(build=9.25, swap=9.25), + clusinfo = array(c(8, 18, 9.25, 45, 0), dim = c(1, 5), + dimnames=list(NULL, c("size", "max_diss", "av_diss", + "diameter", "separation"))))), + p1$clustering == 1, is.null(p1$silinfo) + ) > > set.seed(253) > ## generate 250 objects, divided into 2 clusters. > x <- rbind(cbind(rnorm(120, 0,8), rnorm(120, 0,8)), + cbind(rnorm(130,50,8), rnorm(130,10,8))) > > .proctime00 <- proc.time() > > summary(px2 <- pam(x, 2)) Medoids: ID [1,] 61 -0.7697828 -0.2330187 [2,] 163 49.1392167 9.4097259 Clustering vector: [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 [112] 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [149] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [186] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 [223] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 Objective function: build swap 13.25843 10.20817 Numerical information per cluster: size max_diss av_diss diameter separation [1,] 120 31.04843 10.18584 53.22082 9.419035 [2,] 130 26.94337 10.22878 47.86442 9.419035 Isolated clusters: L-clusters: character(0) L*-clusters: character(0) Silhouette plot information: cluster neighbor sil_width 117 1 2 0.80638966 75 1 2 0.80600824 81 1 2 0.80556624 107 1 2 0.80535252 6 1 2 0.80526675 100 1 2 0.80385505 68 1 2 0.80369702 113 1 2 0.80331774 61 1 2 0.80315322 57 1 2 0.80313945 12 1 2 0.80161573 59 1 2 0.80047745 82 1 2 0.79630964 67 1 2 0.79559589 63 1 2 0.79488886 47 1 2 0.79458809 21 1 2 0.79379540 9 1 2 0.79343081 95 1 2 0.79332153 4 1 2 0.79136081 3 1 2 0.79130879 39 1 2 0.79052367 120 1 2 0.78877423 90 1 2 0.78767224 85 1 2 0.78588359 106 1 2 0.78504452 92 1 2 0.78303000 83 1 2 0.78245915 19 1 2 0.78228359 14 1 2 0.78139236 10 1 2 0.77825678 49 1 2 0.77597087 64 1 2 0.77482761 44 1 2 0.77397394 89 1 2 0.77297318 119 1 2 0.77238705 108 1 2 0.77137189 104 1 2 0.76871378 32 1 2 0.76856251 115 1 2 0.76843312 27 1 2 0.76811698 88 1 2 0.76810713 109 1 2 0.76681303 62 1 2 0.76655954 36 1 2 0.76547988 66 1 2 0.76535606 74 1 2 0.76491406 26 1 2 0.76441455 24 1 2 0.76436188 65 1 2 0.76381352 40 1 2 0.76061109 52 1 2 0.75748679 54 1 2 0.75746436 13 1 2 0.75594073 56 1 2 0.75353784 96 1 2 0.75268786 116 1 2 0.75267215 110 1 2 0.75266614 112 1 2 0.75150872 78 1 2 0.75083708 7 1 2 0.74905187 86 1 2 0.74190424 18 1 2 0.74162144 111 1 2 0.74085474 69 1 2 0.74044653 76 1 2 0.73911707 50 1 2 0.73847075 93 1 2 0.73616384 31 1 2 0.73462007 33 1 2 0.73455252 43 1 2 0.73396232 102 1 2 0.72930751 118 1 2 0.72778023 15 1 2 0.72588122 53 1 2 0.72542363 8 1 2 0.72535191 77 1 2 0.72467809 16 1 2 0.72446952 48 1 2 0.72331213 105 1 2 0.72325095 37 1 2 0.72055248 101 1 2 0.71783562 22 1 2 0.71217552 23 1 2 0.71078375 84 1 2 0.70573352 17 1 2 0.70221946 38 1 2 0.69947240 2 1 2 0.69718780 98 1 2 0.69601237 1 1 2 0.69373841 35 1 2 0.69179546 70 1 2 0.69074915 28 1 2 0.68434091 97 1 2 0.68351978 5 1 2 0.67662675 72 1 2 0.67420722 34 1 2 0.67315267 11 1 2 0.67226046 103 1 2 0.67188668 87 1 2 0.67172802 58 1 2 0.67090513 46 1 2 0.66835116 60 1 2 0.66565445 80 1 2 0.65983842 73 1 2 0.65093947 55 1 2 0.64709226 20 1 2 0.64439401 45 1 2 0.63403361 51 1 2 0.63303101 42 1 2 0.62906268 94 1 2 0.60916406 91 1 2 0.59905996 41 1 2 0.57245485 29 1 2 0.55594781 99 1 2 0.55035955 79 1 2 0.50808544 71 1 2 0.46663954 25 1 2 0.43797346 114 1 2 0.16645003 30 1 2 0.08928664 121 2 1 0.80353953 137 2 1 0.80253721 146 2 1 0.80106653 173 2 1 0.80039417 216 2 1 0.79969919 124 2 1 0.79964913 163 2 1 0.79901674 157 2 1 0.79779188 242 2 1 0.79744315 227 2 1 0.79708130 207 2 1 0.79653829 130 2 1 0.79574204 188 2 1 0.79496670 250 2 1 0.79302877 145 2 1 0.79190501 126 2 1 0.79156003 166 2 1 0.79068795 222 2 1 0.78986170 232 2 1 0.78839216 176 2 1 0.78819086 198 2 1 0.78782877 225 2 1 0.78747329 230 2 1 0.78689375 205 2 1 0.78683641 160 2 1 0.78643596 150 2 1 0.78484046 136 2 1 0.78455577 228 2 1 0.78198238 206 2 1 0.78137390 152 2 1 0.78044944 200 2 1 0.77843458 149 2 1 0.77822272 221 2 1 0.77758324 226 2 1 0.77611981 129 2 1 0.77531368 199 2 1 0.77491451 154 2 1 0.77136276 241 2 1 0.77076783 179 2 1 0.77010597 174 2 1 0.76893758 214 2 1 0.76776510 181 2 1 0.76763087 213 2 1 0.76683151 215 2 1 0.76639087 236 2 1 0.76637552 218 2 1 0.76563050 182 2 1 0.76450873 219 2 1 0.76370712 208 2 1 0.76090426 151 2 1 0.75957536 164 2 1 0.75914844 248 2 1 0.75849775 224 2 1 0.75826151 168 2 1 0.75782023 189 2 1 0.75555083 128 2 1 0.75550519 125 2 1 0.75510766 177 2 1 0.75128941 147 2 1 0.75086382 158 2 1 0.75029192 245 2 1 0.74993652 186 2 1 0.74741247 165 2 1 0.74681005 156 2 1 0.74478894 122 2 1 0.74315425 247 2 1 0.74107328 220 2 1 0.74054057 183 2 1 0.73818743 184 2 1 0.73743259 169 2 1 0.73712431 180 2 1 0.73419669 240 2 1 0.73390938 134 2 1 0.73382823 190 2 1 0.73379720 217 2 1 0.73311931 171 2 1 0.73110365 143 2 1 0.72986022 153 2 1 0.72891371 223 2 1 0.72887340 238 2 1 0.72789416 175 2 1 0.72311665 138 2 1 0.72290131 235 2 1 0.72157157 237 2 1 0.71591233 132 2 1 0.71549875 204 2 1 0.71381083 201 2 1 0.71263881 170 2 1 0.70812568 191 2 1 0.70747428 243 2 1 0.70588929 193 2 1 0.70499170 141 2 1 0.70489885 161 2 1 0.70303117 249 2 1 0.69300988 229 2 1 0.69231982 196 2 1 0.69162302 211 2 1 0.69128644 246 2 1 0.68757678 159 2 1 0.68619850 133 2 1 0.68605444 194 2 1 0.68538064 155 2 1 0.68278455 234 2 1 0.68202095 127 2 1 0.68111027 144 2 1 0.67559517 131 2 1 0.65959281 139 2 1 0.65895024 209 2 1 0.65844942 148 2 1 0.65180336 185 2 1 0.64989675 212 2 1 0.63954685 192 2 1 0.63470144 123 2 1 0.63005333 202 2 1 0.61735843 135 2 1 0.61493992 210 2 1 0.60680456 140 2 1 0.58410004 187 2 1 0.58193543 239 2 1 0.57088679 203 2 1 0.56761998 244 2 1 0.55321123 231 2 1 0.55043439 197 2 1 0.52364031 195 2 1 0.51955678 142 2 1 0.47466260 162 2 1 0.46155841 172 2 1 0.45167576 178 2 1 0.42686872 233 2 1 0.37013099 167 2 1 0.32442373 Average silhouette width per cluster: [1] 0.7196104 0.7148520 Average silhouette width of total data set: [1] 0.717136 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > pdx <- pam(dist(x), 2) > all.equal(px2[nms], pdx[nms], tol = 1e-12) ## TRUE [1] TRUE > pdxK <- pam(dist(x), 2, keep.diss = TRUE) > stopifnot(identical(pdx[nm2], pdxK[nm2])) > > spdx <- silhouette(pdx) > summary(spdx) Silhouette of 250 units in 2 clusters from pam(x = dist(x), k = 2) : Cluster sizes and average silhouette widths: 120 130 0.7196104 0.7148520 Individual silhouette widths: Min. 1st Qu. Median Mean 3rd Qu. Max. 0.08929 0.69140 0.74400 0.71710 0.77810 0.80640 > spdx cluster neighbor sil_width 117 1 2 0.80638966 75 1 2 0.80600824 81 1 2 0.80556624 107 1 2 0.80535252 6 1 2 0.80526675 100 1 2 0.80385505 68 1 2 0.80369702 113 1 2 0.80331774 61 1 2 0.80315322 57 1 2 0.80313945 12 1 2 0.80161573 59 1 2 0.80047745 82 1 2 0.79630964 67 1 2 0.79559589 63 1 2 0.79488886 47 1 2 0.79458809 21 1 2 0.79379540 9 1 2 0.79343081 95 1 2 0.79332153 4 1 2 0.79136081 3 1 2 0.79130879 39 1 2 0.79052367 120 1 2 0.78877423 90 1 2 0.78767224 85 1 2 0.78588359 106 1 2 0.78504452 92 1 2 0.78303000 83 1 2 0.78245915 19 1 2 0.78228359 14 1 2 0.78139236 10 1 2 0.77825678 49 1 2 0.77597087 64 1 2 0.77482761 44 1 2 0.77397394 89 1 2 0.77297318 119 1 2 0.77238705 108 1 2 0.77137189 104 1 2 0.76871378 32 1 2 0.76856251 115 1 2 0.76843312 27 1 2 0.76811698 88 1 2 0.76810713 109 1 2 0.76681303 62 1 2 0.76655954 36 1 2 0.76547988 66 1 2 0.76535606 74 1 2 0.76491406 26 1 2 0.76441455 24 1 2 0.76436188 65 1 2 0.76381352 40 1 2 0.76061109 52 1 2 0.75748679 54 1 2 0.75746436 13 1 2 0.75594073 56 1 2 0.75353784 96 1 2 0.75268786 116 1 2 0.75267215 110 1 2 0.75266614 112 1 2 0.75150872 78 1 2 0.75083708 7 1 2 0.74905187 86 1 2 0.74190424 18 1 2 0.74162144 111 1 2 0.74085474 69 1 2 0.74044653 76 1 2 0.73911707 50 1 2 0.73847075 93 1 2 0.73616384 31 1 2 0.73462007 33 1 2 0.73455252 43 1 2 0.73396232 102 1 2 0.72930751 118 1 2 0.72778023 15 1 2 0.72588122 53 1 2 0.72542363 8 1 2 0.72535191 77 1 2 0.72467809 16 1 2 0.72446952 48 1 2 0.72331213 105 1 2 0.72325095 37 1 2 0.72055248 101 1 2 0.71783562 22 1 2 0.71217552 23 1 2 0.71078375 84 1 2 0.70573352 17 1 2 0.70221946 38 1 2 0.69947240 2 1 2 0.69718780 98 1 2 0.69601237 1 1 2 0.69373841 35 1 2 0.69179546 70 1 2 0.69074915 28 1 2 0.68434091 97 1 2 0.68351978 5 1 2 0.67662675 72 1 2 0.67420722 34 1 2 0.67315267 11 1 2 0.67226046 103 1 2 0.67188668 87 1 2 0.67172802 58 1 2 0.67090513 46 1 2 0.66835116 60 1 2 0.66565445 80 1 2 0.65983842 73 1 2 0.65093947 55 1 2 0.64709226 20 1 2 0.64439401 45 1 2 0.63403361 51 1 2 0.63303101 42 1 2 0.62906268 94 1 2 0.60916406 91 1 2 0.59905996 41 1 2 0.57245485 29 1 2 0.55594781 99 1 2 0.55035955 79 1 2 0.50808544 71 1 2 0.46663954 25 1 2 0.43797346 114 1 2 0.16645003 30 1 2 0.08928664 121 2 1 0.80353953 137 2 1 0.80253721 146 2 1 0.80106653 173 2 1 0.80039417 216 2 1 0.79969919 124 2 1 0.79964913 163 2 1 0.79901674 157 2 1 0.79779188 242 2 1 0.79744315 227 2 1 0.79708130 207 2 1 0.79653829 130 2 1 0.79574204 188 2 1 0.79496670 250 2 1 0.79302877 145 2 1 0.79190501 126 2 1 0.79156003 166 2 1 0.79068795 222 2 1 0.78986170 232 2 1 0.78839216 176 2 1 0.78819086 198 2 1 0.78782877 225 2 1 0.78747329 230 2 1 0.78689375 205 2 1 0.78683641 160 2 1 0.78643596 150 2 1 0.78484046 136 2 1 0.78455577 228 2 1 0.78198238 206 2 1 0.78137390 152 2 1 0.78044944 200 2 1 0.77843458 149 2 1 0.77822272 221 2 1 0.77758324 226 2 1 0.77611981 129 2 1 0.77531368 199 2 1 0.77491451 154 2 1 0.77136276 241 2 1 0.77076783 179 2 1 0.77010597 174 2 1 0.76893758 214 2 1 0.76776510 181 2 1 0.76763087 213 2 1 0.76683151 215 2 1 0.76639087 236 2 1 0.76637552 218 2 1 0.76563050 182 2 1 0.76450873 219 2 1 0.76370712 208 2 1 0.76090426 151 2 1 0.75957536 164 2 1 0.75914844 248 2 1 0.75849775 224 2 1 0.75826151 168 2 1 0.75782023 189 2 1 0.75555083 128 2 1 0.75550519 125 2 1 0.75510766 177 2 1 0.75128941 147 2 1 0.75086382 158 2 1 0.75029192 245 2 1 0.74993652 186 2 1 0.74741247 165 2 1 0.74681005 156 2 1 0.74478894 122 2 1 0.74315425 247 2 1 0.74107328 220 2 1 0.74054057 183 2 1 0.73818743 184 2 1 0.73743259 169 2 1 0.73712431 180 2 1 0.73419669 240 2 1 0.73390938 134 2 1 0.73382823 190 2 1 0.73379720 217 2 1 0.73311931 171 2 1 0.73110365 143 2 1 0.72986022 153 2 1 0.72891371 223 2 1 0.72887340 238 2 1 0.72789416 175 2 1 0.72311665 138 2 1 0.72290131 235 2 1 0.72157157 237 2 1 0.71591233 132 2 1 0.71549875 204 2 1 0.71381083 201 2 1 0.71263881 170 2 1 0.70812568 191 2 1 0.70747428 243 2 1 0.70588929 193 2 1 0.70499170 141 2 1 0.70489885 161 2 1 0.70303117 249 2 1 0.69300988 229 2 1 0.69231982 196 2 1 0.69162302 211 2 1 0.69128644 246 2 1 0.68757678 159 2 1 0.68619850 133 2 1 0.68605444 194 2 1 0.68538064 155 2 1 0.68278455 234 2 1 0.68202095 127 2 1 0.68111027 144 2 1 0.67559517 131 2 1 0.65959281 139 2 1 0.65895024 209 2 1 0.65844942 148 2 1 0.65180336 185 2 1 0.64989675 212 2 1 0.63954685 192 2 1 0.63470144 123 2 1 0.63005333 202 2 1 0.61735843 135 2 1 0.61493992 210 2 1 0.60680456 140 2 1 0.58410004 187 2 1 0.58193543 239 2 1 0.57088679 203 2 1 0.56761998 244 2 1 0.55321123 231 2 1 0.55043439 197 2 1 0.52364031 195 2 1 0.51955678 142 2 1 0.47466260 162 2 1 0.46155841 172 2 1 0.45167576 178 2 1 0.42686872 233 2 1 0.37013099 167 2 1 0.32442373 attr(,"Ordered") [1] TRUE attr(,"call") pam(x = dist(x), k = 2) attr(,"class") [1] "silhouette" > postscript("pam-tst.ps") > if(FALSE) + plot(spdx)# the silhouette > ## is now identical : > plot(pdx)# failed in 1.7.0 -- now only does silhouette > > par(mfrow = 2:1) > ## new 'dist' argument for clusplot(): > plot(pdx, dist=dist(x)) > ## but this should work automagically (via eval()) as well: > plot(pdx) > ## or this > clusplot(pdx) > > data(ruspini) > summary(pr4 <- pam(ruspini, 4)) Medoids: ID x y 10 10 19 65 32 32 44 149 52 52 99 119 70 70 69 21 Clustering vector: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 Objective function: build swap 17.22898 11.48637 Numerical information per cluster: size max_diss av_diss diameter separation [1,] 20 24.04163 12.55362 40.24922 40.49691 [2,] 23 26.92582 10.44238 36.61967 24.04163 [3,] 17 33.97058 13.84800 47.63402 24.04163 [4,] 15 17.02939 8.98767 27.07397 40.49691 Isolated clusters: L-clusters: character(0) L*-clusters: [1] 1 4 Silhouette plot information: cluster neighbor sil_width 10 1 4 0.8056096 6 1 4 0.7954977 9 1 4 0.7923048 11 1 4 0.7831672 8 1 2 0.7811793 12 1 4 0.7658171 3 1 4 0.7587961 14 1 4 0.7569107 2 1 4 0.7456150 16 1 4 0.7436018 13 1 4 0.7398841 4 1 2 0.7361533 18 1 4 0.7080079 15 1 4 0.7006854 19 1 4 0.7000938 1 1 4 0.6798381 5 1 4 0.6646571 20 1 4 0.6619626 17 1 4 0.6148541 7 1 2 0.5900575 26 2 3 0.8357433 32 2 3 0.8332753 27 2 3 0.8290271 25 2 3 0.8285547 28 2 3 0.8192636 35 2 3 0.8186309 33 2 3 0.8175087 23 2 3 0.8089969 22 2 3 0.8025389 34 2 3 0.8013310 31 2 3 0.7949677 36 2 3 0.7943536 24 2 3 0.7930770 29 2 3 0.7897346 30 2 3 0.7892027 21 2 3 0.7698024 37 2 3 0.7684502 39 2 3 0.7631648 38 2 3 0.7438848 40 2 3 0.7083130 42 2 3 0.5291270 43 2 3 0.4931623 41 2 3 0.4290814 54 3 2 0.7741745 57 3 2 0.7703455 55 3 2 0.7641810 50 3 2 0.7619943 52 3 2 0.7616220 56 3 2 0.7575313 59 3 2 0.7327828 49 3 2 0.7317002 51 3 2 0.7209864 60 3 2 0.7206840 58 3 2 0.7019611 53 3 2 0.6775322 45 3 2 0.5974787 46 3 2 0.5740823 47 3 2 0.4835635 48 3 2 0.4247331 44 3 2 0.4196093 70 4 1 0.8548947 67 4 1 0.8527439 65 4 1 0.8503105 69 4 1 0.8391810 71 4 1 0.8381065 66 4 1 0.8229841 62 4 1 0.8153092 64 4 1 0.8061254 73 4 1 0.7950213 63 4 1 0.7795369 72 4 1 0.7748121 61 4 1 0.7701103 68 4 1 0.7620559 74 4 1 0.7596815 75 4 1 0.7425538 Average silhouette width per cluster: [1] 0.7262347 0.7548344 0.6691154 0.8042285 Average silhouette width of total data set: [1] 0.737657 2775 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 1.4142 40.1060 75.5910 71.5380 99.1690 154.5000 Metric : euclidean Number of objects : 75 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > (pr3 <- pam(ruspini, 3)) Medoids: ID x y 17 17 30 52 32 32 44 149 52 52 99 119 Clustering vector: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 Objective function: build swap 25.68229 21.59293 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > (pr5 <- pam(ruspini, 5)) Medoids: ID x y 10 10 19 65 32 32 44 149 52 52 99 119 47 47 78 94 70 70 69 21 Clustering vector: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 4 4 4 3 3 3 3 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 3 3 3 3 3 3 3 3 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 Objective function: build swap 12.09864 10.39579 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > > data(votes.repub) > summary(pv3 <- pam(votes.repub, 3)) Medoids: ID X1856 X1860 X1864 X1868 X1872 X1876 X1880 X1884 X1888 X1892 X1896 Alabama 1 NA NA NA 51.44 53.19 40.02 36.98 38.44 32.28 3.95 28.13 Alaska 2 NA NA NA NA NA NA NA NA NA NA NA New Mexico 31 NA NA NA NA NA NA NA NA NA NA NA X1900 X1904 X1908 X1912 X1916 X1920 X1924 X1928 X1932 X1936 X1940 Alabama 34.67 20.65 24.38 8.26 21.97 30.98 27.01 48.49 14.15 12.82 14.34 Alaska NA NA NA NA NA NA NA NA NA NA NA New Mexico NA NA NA 35.91 46.53 54.68 48.52 59.01 35.76 36.50 43.28 X1944 X1948 X1952 X1956 X1960 X1964 X1968 X1972 X1976 Alabama 18.20 19.04 35.02 39.39 41.75 69.5 14.0 72.4 43.48 Alaska NA NA NA NA 50.94 34.1 45.3 58.1 62.91 New Mexico 46.44 42.93 55.39 57.81 49.41 41.0 51.8 61.0 51.04 Clustering vector: Alabama Alaska Arizona Arkansas California 1 2 3 1 2 Colorado Connecticut Delaware Florida Georgia 2 2 3 1 1 Hawaii Idaho Illinois Indiana Iowa 2 3 2 3 3 Kansas Kentucky Louisiana Maine Maryland 2 3 1 2 3 Massachusetts Michigan Minnesota Mississippi Missouri 3 2 3 1 3 Montana Nebraska Nevada New Hampshire New Jersey 3 3 2 2 2 New Mexico New York North Carolina North Dakota Ohio 3 3 3 2 3 Oklahoma Oregon Pennsylvania Rhode Island South Carolina 3 3 2 3 2 South Dakota Tennessee Texas Utah Vermont 3 3 2 3 2 Virginia Washington West Virginia Wisconsin Wyoming 2 3 3 3 3 Objective function: build swap 38.32548 38.32548 Numerical information per cluster: size max_diss av_diss diameter separation [1,] 6 78.92731 51.59134 116.7048 50.14189 [2,] 18 86.54675 38.47068 271.2455 19.42184 [3,] 26 60.03879 35.16361 124.8324 19.42184 Isolated clusters: L-clusters: character(0) L*-clusters: character(0) Silhouette plot information: cluster neighbor sil_width Louisiana 1 3 0.54689535 Alabama 1 3 0.52839272 Georgia 1 3 0.52730253 Mississippi 1 2 0.52454810 Florida 1 3 0.25211631 Arkansas 1 3 0.24131701 Alaska 2 3 0.15699268 Hawaii 2 3 0.08479842 Vermont 2 3 -0.02620975 Maine 2 3 -0.03284950 Michigan 2 3 -0.11524982 Pennsylvania 2 3 -0.15341477 New Hampshire 2 3 -0.17099889 Connecticut 2 3 -0.19095000 New Jersey 2 3 -0.19281567 Kansas 2 3 -0.19719316 California 2 3 -0.24006293 Illinois 2 3 -0.25236336 North Dakota 2 3 -0.25464430 Virginia 2 3 -0.26262534 Nevada 2 3 -0.27016336 Colorado 2 3 -0.27885043 Texas 2 1 -0.47297583 South Carolina 2 1 -0.50899710 New Mexico 3 2 0.39555584 Washington 3 2 0.32989454 Oklahoma 3 2 0.30953823 Wyoming 3 2 0.30163169 Idaho 3 2 0.29915132 Montana 3 2 0.29105494 Missouri 3 2 0.29038462 Oregon 3 2 0.27710695 Maryland 3 2 0.27437520 West Virginia 3 2 0.27089938 Utah 3 2 0.26964380 Tennessee 3 2 0.26846440 Arizona 3 2 0.25968564 Delaware 3 2 0.25920434 Kentucky 3 2 0.25868341 South Dakota 3 2 0.25615670 Indiana 3 2 0.25031548 Wisconsin 3 2 0.21808013 Ohio 3 2 0.21477474 Nebraska 3 2 0.20965953 North Carolina 3 2 0.19201537 Minnesota 3 2 0.18955165 New York 3 2 0.18820394 Iowa 3 2 0.17296046 Rhode Island 3 2 0.12599915 Massachusetts 3 2 0.12106770 Average silhouette width per cluster: [1] 0.4367620 -0.1876985 0.2497715 Average silhouette width of total data set: [1] 0.1147212 1225 dissimilarities, summarized : Min. 1st Qu. Median Mean 3rd Qu. Max. 17.199 48.343 64.681 82.227 105.490 281.950 Metric : euclidean Number of objects : 50 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > (pv4 <- pam(votes.repub, 4)) Medoids: ID X1856 X1860 X1864 X1868 X1872 X1876 X1880 X1884 X1888 X1892 X1896 Alabama 1 NA NA NA 51.44 53.19 40.02 36.98 38.44 32.28 3.95 28.13 Alaska 2 NA NA NA NA NA NA NA NA NA NA NA New Mexico 31 NA NA NA NA NA NA NA NA NA NA NA Iowa 15 49.13 54.87 64.23 61.92 64.18 58.58 56.85 52.42 52.36 49.60 55.46 X1900 X1904 X1908 X1912 X1916 X1920 X1924 X1928 X1932 X1936 X1940 Alabama 34.67 20.65 24.38 8.26 21.97 30.98 27.01 48.49 14.15 12.82 14.34 Alaska NA NA NA NA NA NA NA NA NA NA NA New Mexico NA NA NA 35.91 46.53 54.68 48.52 59.01 35.76 36.50 43.28 Iowa 57.99 63.37 55.62 24.30 54.06 70.91 55.06 61.80 39.98 42.70 52.03 X1944 X1948 X1952 X1956 X1960 X1964 X1968 X1972 X1976 Alabama 18.20 19.04 35.02 39.39 41.75 69.5 14.0 72.4 43.48 Alaska NA NA NA NA 50.94 34.1 45.3 58.1 62.91 New Mexico 46.44 42.93 55.39 57.81 49.41 41.0 51.8 61.0 51.04 Iowa 51.99 47.58 63.76 59.06 56.71 38.1 53.0 57.6 50.51 Clustering vector: Alabama Alaska Arizona Arkansas California 1 2 3 1 2 Colorado Connecticut Delaware Florida Georgia 2 2 3 1 1 Hawaii Idaho Illinois Indiana Iowa 2 3 4 3 4 Kansas Kentucky Louisiana Maine Maryland 4 3 1 2 3 Massachusetts Michigan Minnesota Mississippi Missouri 4 2 4 1 3 Montana Nebraska Nevada New Hampshire New Jersey 3 4 2 2 2 New Mexico New York North Carolina North Dakota Ohio 3 3 3 4 4 Oklahoma Oregon Pennsylvania Rhode Island South Carolina 3 3 4 4 2 South Dakota Tennessee Texas Utah Vermont 4 3 2 3 2 Virginia Washington West Virginia Wisconsin Wyoming 2 3 3 4 3 Objective function: build swap 35.84182 35.84182 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > (pv6 <- pam(votes.repub, 6, trace = 3)) C pam(): computing 1226 dissimilarities from 50 x 31 matrix: [Ok] pam()'s bswap(*, s=281.951, pamonce=0): build 6 medoids: new repr. 2 new repr. 1 new repr. 31 new repr. 15 new repr. 46 new repr. 40 after build: medoids are 1 2 15 31 40 46 and min.dist dysma[1:n] are 0 0 37.7 56 35.8 28.5 28.6 31.7 54.1 48.2 51.7 33.2 27.3 30.5 0 35.1 25.4 60.9 36.9 26.7 48.4 28.1 33.2 63.1 21.1 28.6 37.5 35.8 29.8 31.3 0 32 29.9 35.7 30.9 35.1 27.8 35.7 50.2 0 26.2 30.2 45.2 34.1 33.8 0 28.5 35.1 34.2 28.8 swp new 10 <-> 1 old; decreasing diss. 1579.03 by -2.57067 end{bswap()}, end{cstat()} Medoids: ID X1856 X1860 X1864 X1868 X1872 X1876 X1880 X1884 X1888 X1892 Georgia 10 NA NA NA 35.72 43.77 27.94 34.33 33.84 28.33 21.80 Alaska 2 NA NA NA NA NA NA NA NA NA NA Virginia 46 0.19 1.15 NA NA 50.48 40.62 39.52 48.90 49.47 38.75 New Mexico 31 NA NA NA NA NA NA NA NA NA NA Iowa 15 49.13 54.87 64.23 61.92 64.18 58.58 56.85 52.42 52.36 49.60 South Carolina 40 NA NA NA 57.93 75.95 50.26 33.97 23.72 17.27 18.99 X1896 X1900 X1904 X1908 X1912 X1916 X1920 X1924 X1928 X1932 Georgia 36.82 28.56 18.32 31.40 4.27 7.07 28.57 18.19 43.37 7.77 Alaska NA NA NA NA NA NA NA NA NA NA Virginia 45.90 43.81 36.67 38.36 17.00 32.05 37.85 32.79 53.91 30.09 New Mexico NA NA NA NA 35.91 46.53 54.68 48.52 59.01 35.76 Iowa 55.46 57.99 63.37 55.62 24.30 54.06 70.91 55.06 61.80 39.98 South Carolina 13.51 7.04 4.63 5.97 1.06 2.43 3.90 2.21 8.54 1.89 X1936 X1940 X1944 X1948 X1952 X1956 X1960 X1964 X1968 X1972 Georgia 12.60 14.84 18.25 18.31 30.34 33.22 37.44 54.1 30.4 75.0 Alaska NA NA NA NA NA NA 50.94 34.1 45.3 58.1 Virginia 29.39 31.55 37.39 41.04 56.32 55.37 52.44 46.5 41.4 67.8 New Mexico 36.50 43.28 46.44 42.93 55.39 57.81 49.41 41.0 51.8 61.0 Iowa 42.70 52.03 51.99 47.58 63.76 59.06 56.71 38.1 53.0 57.6 South Carolina 1.43 4.37 4.46 3.78 49.28 25.18 48.76 58.9 38.1 70.8 X1976 Georgia 33.02 Alaska 62.91 Virginia 50.73 New Mexico 51.04 Iowa 50.51 South Carolina 43.54 Clustering vector: Alabama Alaska Arizona Arkansas California 1 2 3 3 2 Colorado Connecticut Delaware Florida Georgia 2 2 4 3 1 Hawaii Idaho Illinois Indiana Iowa 2 4 5 4 5 Kansas Kentucky Louisiana Maine Maryland 5 4 1 2 4 Massachusetts Michigan Minnesota Mississippi Missouri 5 2 5 6 4 Montana Nebraska Nevada New Hampshire New Jersey 4 5 2 2 2 New Mexico New York North Carolina North Dakota Ohio 4 4 3 5 5 Oklahoma Oregon Pennsylvania Rhode Island South Carolina 4 4 5 5 6 South Dakota Tennessee Texas Utah Vermont 5 3 2 4 2 Virginia Washington West Virginia Wisconsin Wyoming 3 4 4 5 4 Objective function: build swap 31.58067 31.52926 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" "data" > > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 1.817 0.028 1.938 0 0 > > ## re-starting with medoids from pv6 shouldn't change: > pv6. <- pam(votes.repub, 6, medoids = pv6$id.med, trace = 3) C pam(): computing 1226 dissimilarities from 50 x 31 matrix: [Ok] pam()'s bswap(*, s=281.951, pamonce=0): medoids given after build: medoids are 2 10 15 31 40 46 and min.dist dysma[1:n] are 48.2 0 37.7 56 35.8 28.5 28.6 31.7 54.1 0 51.7 33.2 27.3 30.5 0 35.1 25.4 58.3 36.9 26.7 48.4 28.1 33.2 63.1 21.1 28.6 37.5 35.8 29.8 31.3 0 32 29.9 35.7 30.9 35.1 27.8 35.7 50.2 0 26.2 30.2 45.2 34.1 33.8 0 28.5 35.1 34.2 28.8 end{bswap()}, end{cstat()} > identical(pv6[nm3], pv6.[nm3]) [1] TRUE > > ## This example seg.faulted at some point: > d.st <- data.frame(V1= c(9, 12, 12, 15, 9, 9, 13, 11, 15, 10, 13, 13, + 13, 15, 8, 13, 13, 10, 7, 9, 6, 11, 3), + V2= c(5, 9, 3, 5, 1, 1, 2, NA, 10, 1, 4, 7, + 4, NA, NA, 5, 2, 4, 3, 3, 6, 1, 1), + V3 = c(63, 41, 59, 50, 290, 226, 60, 36, 32, 121, 70, 51, + 79, 32, 42, 39, 76, 60, 56, 88, 57, 309, 254), + V4 = c(146, 43, 78, 88, 314, 149, 78, NA, 238, 153, 159, 222, + 203, NA, NA, 74, 100, 111, 9, 180, 50, 256, 107)) > dd <- daisy(d.st, stand = TRUE) > (r0 <- pam(dd, 5))# cluster 5 = { 23 } -- on single observation Medoids: ID [1,] 15 15 [2,] 8 8 [3,] 14 14 [4,] 22 22 [5,] 23 23 Clustering vector: [1] 1 2 2 3 4 4 2 2 3 2 2 2 2 3 1 2 2 2 1 1 1 4 5 Objective function: build swap 0.9368049 0.8621860 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" > ## pam doing the "daisy" computation internally: > r0s <- pam(d.st, 5, stand=TRUE, keep.diss=FALSE, keep.data=FALSE) > (ii <- which(names(r0) %in% c("call","medoids"))) [1] 1 9 > stopifnot(all.equal(r0[-ii], r0s[-ii], tol=1e-14), + identical(r0s$medoids, data.matrix(d.st)[r0$medoids, ])) > > ## This gave only 3 different medoids -> and seg.fault: > (r5 <- pam(dd, 5, medoids = c(1,3,20,2,5), trace = 2)) # now "fine" pam()'s bswap(*, s=8.51931, pamonce=0): medoids given after build: medoids are 1 2 3 5 20 swp new 14 <-> 2 old; decreasing diss. 29.8745 by -5.50096 swp new 15 <-> 1 old; decreasing diss. 24.3735 by -2.20162 swp new 6 <-> 20 old; decreasing diss. 22.1719 by -2.12745 swp new 8 <-> 3 old; decreasing diss. 20.0444 by -0.201608 end{bswap()}, end{cstat()} Medoids: ID [1,] 15 15 [2,] 8 8 [3,] 14 14 [4,] 5 5 [5,] 6 6 Clustering vector: [1] 1 2 2 3 4 5 2 2 3 5 2 2 2 3 1 2 2 2 1 1 1 4 5 Objective function: build swap 1.2988899 0.8627319 Available components: [1] "medoids" "id.med" "clustering" "objective" "isolation" [6] "clusinfo" "silinfo" "diss" "call" > > dev.off() null device 1 > > ##------------------------ Testing pam() with new "pamonce" argument: > > ## This is from "next version of Matrix" test-tools-1.R: > showSys.time <- function(expr) { + ## prepend 'Time' for R CMD Rdiff + st <- system.time(expr) + writeLines(paste("Time", capture.output(print(st)))) + invisible(st) + } > show2Ratios <- function(...) { + stopifnot(length(rgs <- list(...)) == 2, + nchar(ns <- names(rgs)) > 0) + r <- round(cbind(..1, ..2)[c(1,3),], 3) + dimnames(r) <- list(paste("Time ", rownames(r)), ns) + r + } > > > n <- 1000 > ## If not enough cases, all CPU times equals 0. > n <- 500 # for now, and automatic testing > > sd <- 0.5 > set.seed(13) > n2 <- as.integer(round(n * 1.5)) > x <- rbind(cbind(rnorm( n,0,sd), rnorm( n,0,sd)), + cbind(rnorm(n2,5,sd), rnorm(n2,5,sd)), + cbind(rnorm(n2,7,sd), rnorm(n2,7,sd)), + cbind(rnorm(n2,9,sd), rnorm(n2,9,sd))) > > > ## original algorithm > st0 <- showSys.time(pamx <- pam(x, 4, trace.lev=2))# 8.157 0.024 8.233 C pam(): computing 3779876 dissimilarities from 2750 x 2 matrix: [Ok] pam()'s bswap(*, s=15.7788, pamonce=0): build 4 medoids: new repr. 1268 new repr. 414 new repr. 2153 new repr. 915 after build: medoids are 414 915 1268 2153 swp new 1793 <-> 1268 old; decreasing diss. 1862.37 by -129.13 end{bswap()}, end{cstat()} Time user system elapsed Time 1.832 0.032 1.893 > ## bswapPamOnce algorithm > st1 <- showSys.time(pamxonce <- pam(x, 4, pamonce=TRUE, trace.lev=2))# 6.122 0.024 6.181 C pam(): computing 3779876 dissimilarities from 2750 x 2 matrix: [Ok] pam()'s bswap(*, s=15.7788, pamonce=1): build 4 medoids: new repr. 1268 new repr. 414 new repr. 2153 new repr. 915 after build: medoids are 414 915 1268 2153 swp new 1793 <-> 1268 old; decreasing diss. 1862.37 by -129.13 end{bswap()}, end{cstat()} Time user system elapsed Time 1.607 0.024 1.639 > ## bswapPamOnceDistIndice > st2 <- showSys.time(pamxonce2 <- pam(x, 4, pamonce = 2, trace.lev=2))# 4.101 0.024 4.151 C pam(): computing 3779876 dissimilarities from 2750 x 2 matrix: [Ok] pam()'s bswap(*, s=15.7788, pamonce=2): build 4 medoids: new repr. 1268 new repr. 414 new repr. 2153 new repr. 915 after build: medoids are 414 915 1268 2153 swp new 1793 <-> 1268 old; decreasing diss. 1862.37 by -129.13 end{bswap()}, end{cstat()} Time user system elapsed Time 1.302 0.000 1.304 > show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) 2:orig 1:orig Time user.self 0.711 0.877 Time elapsed 0.689 0.866 > > ## only call element is not equal > (icall <- which(names(pamx) == "call")) [1] 9 > pamx[[icall]] pam(x = x, k = 4, trace.lev = 2) > stopifnot(all.equal(pamx [-icall], pamxonce [-icall]), + all.equal(pamxonce[-icall], pamxonce2[-icall])) > > ## Same using specified medoids > (med0 <- 1 + round(n* c(0,1, 2.5, 4)))# lynne (~ 2010, AMD Phenom II X4 925) [1] 1 501 1251 2001 > st0 <- showSys.time(pamxst <- pam(x, 4, medoids = med0, trace.lev=2))# 13.071 0.024 13.177 C pam(): computing 3779876 dissimilarities from 2750 x 2 matrix: [Ok] pam()'s bswap(*, s=15.7788, pamonce=0): medoids given after build: medoids are 1 501 1251 2001 swp new 915 <-> 501 old; decreasing diss. 2126.83 by -197.507 swp new 1793 <-> 1251 old; decreasing diss. 1929.32 by -101.336 swp new 414 <-> 1 old; decreasing diss. 1827.98 by -86.3404 swp new 2153 <-> 2001 old; decreasing diss. 1741.64 by -8.40201 end{bswap()}, end{cstat()} Time user system elapsed Time 2.574 0.004 2.579 > st1 <- showSys.time(pamxoncest <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))# 8.503 0.024 8.578 C pam(): computing 3779876 dissimilarities from 2750 x 2 matrix: [Ok] pam()'s bswap(*, s=15.7788, pamonce=1): medoids given after build: medoids are 1 501 1251 2001 swp new 915 <-> 501 old; decreasing diss. 2126.83 by -197.507 swp new 1793 <-> 1251 old; decreasing diss. 1929.32 by -101.336 swp new 414 <-> 1 old; decreasing diss. 1827.98 by -86.3404 swp new 2153 <-> 2001 old; decreasing diss. 1741.64 by -8.40201 end{bswap()}, end{cstat()} Time user system elapsed Time 2.121 0.000 2.129 > st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2, trace.lev=2))# 5.587 0.025 5.647 C pam(): computing 3779876 dissimilarities from 2750 x 2 matrix: [Ok] pam()'s bswap(*, s=15.7788, pamonce=2): medoids given after build: medoids are 1 501 1251 2001 swp new 915 <-> 501 old; decreasing diss. 2126.83 by -197.507 swp new 1793 <-> 1251 old; decreasing diss. 1929.32 by -101.336 swp new 414 <-> 1 old; decreasing diss. 1827.98 by -86.3404 swp new 2153 <-> 2001 old; decreasing diss. 1741.64 by -8.40201 end{bswap()}, end{cstat()} Time user system elapsed Time 1.305 0.004 1.310 > show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) 2:orig 1:orig Time user.self 0.507 0.824 Time elapsed 0.508 0.826 > > ## only call element is not equal > stopifnot(all.equal(pamxst [-icall], pamxoncest [-icall]), + all.equal(pamxoncest[-icall], pamxonce2st[-icall])) > > ## Different starting values > med0 <- 1:4 # lynne (~ 2010, AMD Phenom II X4 925) > st0 <- showSys.time(pamxst <- pam(x, 4, medoids = med0, trace.lev=2))# 13.416 0.023 13.529 C pam(): computing 3779876 dissimilarities from 2750 x 2 matrix: [Ok] pam()'s bswap(*, s=15.7788, pamonce=0): medoids given after build: medoids are 1 2 3 4 swp new 1421 <-> 4 old; decreasing diss. 21009.4 by -15939.9 swp new 2153 <-> 3 old; decreasing diss. 5069.52 by -1657.88 swp new 915 <-> 2 old; decreasing diss. 3411.65 by -1592.06 swp new 414 <-> 1 old; decreasing diss. 1819.59 by -86.3404 swp new 1793 <-> 1421 old; decreasing diss. 1733.25 by -0.00639415 end{bswap()}, end{cstat()} Time user system elapsed Time 3.087 0.000 3.087 > st1 <- showSys.time(pamxoncest <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))# 8.384 0.024 8.459 C pam(): computing 3779876 dissimilarities from 2750 x 2 matrix: [Ok] pam()'s bswap(*, s=15.7788, pamonce=1): medoids given after build: medoids are 1 2 3 4 swp new 1421 <-> 4 old; decreasing diss. 21009.4 by -15939.9 swp new 2153 <-> 3 old; decreasing diss. 5069.52 by -1657.88 swp new 915 <-> 2 old; decreasing diss. 3411.65 by -1592.06 swp new 414 <-> 1 old; decreasing diss. 1819.59 by -86.3404 swp new 1793 <-> 1421 old; decreasing diss. 1733.25 by -0.00639415 end{bswap()}, end{cstat()} Time user system elapsed Time 2.487 0.000 2.488 > st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2, trace.lev=2))# 5.455 0.030 5.520 C pam(): computing 3779876 dissimilarities from 2750 x 2 matrix: [Ok] pam()'s bswap(*, s=15.7788, pamonce=2): medoids given after build: medoids are 1 2 3 4 swp new 1421 <-> 4 old; decreasing diss. 21009.4 by -15939.9 swp new 2153 <-> 3 old; decreasing diss. 5069.52 by -1657.88 swp new 915 <-> 2 old; decreasing diss. 3411.65 by -1592.06 swp new 414 <-> 1 old; decreasing diss. 1819.59 by -86.3404 swp new 1793 <-> 1421 old; decreasing diss. 1733.25 by -0.00639415 end{bswap()}, end{cstat()} Time user system elapsed Time 1.566 0.000 1.568 > show2Ratios('2:orig' = st2/st0, '1:orig' = st1/st0) 2:orig 1:orig Time user.self 0.507 0.806 Time elapsed 0.508 0.806 > > ## only call element is not equal > stopifnot(all.equal(pamxst [-icall], pamxoncest [-icall]), + all.equal(pamxoncest[-icall], pamxonce2st[-icall])) > > > ## Medoid bug --- MM: Fixed, well "0L+ hack", in my pam.q, on 2012-01-31 > ## ---------- > med0 <- (1:6) > st0 <- showSys.time(pamxst <- pam(x, 6, medoids = med0 , trace.lev=2)) C pam(): computing 3779876 dissimilarities from 2750 x 2 matrix: [Ok] pam()'s bswap(*, s=15.7788, pamonce=0): medoids given after build: medoids are 1 2 3 4 5 6 swp new 1421 <-> 6 old; decreasing diss. 20991.1 by -15949.5 swp new 2153 <-> 4 old; decreasing diss. 5041.66 by -1676.25 swp new 915 <-> 3 old; decreasing diss. 3365.41 by -1671.37 swp new 325 <-> 2 old; decreasing diss. 1694.04 by -53.8582 swp new 2720 <-> 5 old; decreasing diss. 1640.18 by -26.6572 swp new 2696 <-> 2153 old; decreasing diss. 1613.53 by -19.0531 swp new 52 <-> 1 old; decreasing diss. 1594.47 by -13.965 swp new 2709 <-> 2720 old; decreasing diss. 1580.51 by -5.81848 swp new 199 <-> 325 old; decreasing diss. 1574.69 by -2.65496 swp new 438 <-> 52 old; decreasing diss. 1572.03 by -1.77054 swp new 2082 <-> 2696 old; decreasing diss. 1570.26 by -0.187256 end{bswap()}, end{cstat()} Time user system elapsed Time 9.981 0.004 10.018 > stopifnot(identical(med0, 1:6)) > med0 <- (1:6) > st1 <- showSys.time(pamxst.1 <- pam(x, 6, medoids = med0 , pamonce=1, trace.lev=2)) C pam(): computing 3779876 dissimilarities from 2750 x 2 matrix: [Ok] pam()'s bswap(*, s=15.7788, pamonce=1): medoids given after build: medoids are 1 2 3 4 5 6 swp new 1421 <-> 6 old; decreasing diss. 20991.1 by -15949.5 swp new 2153 <-> 4 old; decreasing diss. 5041.66 by -1676.25 swp new 915 <-> 3 old; decreasing diss. 3365.41 by -1671.37 swp new 325 <-> 2 old; decreasing diss. 1694.04 by -53.8582 swp new 2720 <-> 5 old; decreasing diss. 1640.18 by -26.6572 swp new 2696 <-> 2153 old; decreasing diss. 1613.53 by -19.0531 swp new 52 <-> 1 old; decreasing diss. 1594.47 by -13.965 swp new 2709 <-> 2720 old; decreasing diss. 1580.51 by -5.81848 swp new 199 <-> 325 old; decreasing diss. 1574.69 by -2.65496 swp new 438 <-> 52 old; decreasing diss. 1572.03 by -1.77054 swp new 2082 <-> 2696 old; decreasing diss. 1570.26 by -0.187256 end{bswap()}, end{cstat()} Time user system elapsed Time 6.703 0.000 6.713 > stopifnot(identical(med0, 1:6)) > stopifnot(all.equal(pamxst[-icall], pamxst.1 [-icall])) > > > ## Last Line: > cat('Time elapsed: ', proc.time() - .proctime00,'\n') Time elapsed: 36.876 0.112 37.177 0 0 > > > proc.time() user system elapsed 37.116 0.156 37.452 cluster/tests/clusplot-out.R0000644000175100001440000000152410761764276015732 0ustar hornikuserslibrary(cluster) ### clusplot() & pam() RESULT checking ... ## plotting votes.diss(dissimilarity) in a bivariate plot and ## partitioning into 2 clusters data(votes.repub) votes.diss <- daisy(votes.repub) for(k in 2:4) { votes.clus <- pam(votes.diss, k, diss = TRUE)$clustering print(clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE)) } ## plotting iris (dataframe) in a 2-dimensional plot and partitioning ## into 3 clusters. data(iris) iris.x <- iris[, 1:4] for(k in 2:5) print(clusplot(iris.x, pam(iris.x, k)$clustering, diss = FALSE)) .Random.seed <- c(0L,rep(7654L,3)) ## generate 25 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)), cbind(rnorm(15,5,0.5), rnorm(15,5,0.5))) print.default(clusplot(px2 <- pam(x, 2))) clusplot(px2, labels = 2, col.p = 1 + px2$clustering) cluster/tests/clara-NAs.Rout.save0000644000175100001440000007141012461440417016472 0ustar hornikusers R Under development (unstable) (2015-01-21 r67569) -- "Unsuffered Consequences" Copyright (C) 2015 The R Foundation for Statistical Computing Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(cluster) > > x <- cbind(c(0, -4, -22, -14, 0, NA, -28, 1, 10, -1, + 100 + c(13, 0, 2, 4, 7, 8, 1)), + c(-5, -14, NA, -35, -30, NA, 7, 2, -18, 13, + 47, 64, 48, NA, NA, 44, 65)) > x [,1] [,2] [1,] 0 -5 [2,] -4 -14 [3,] -22 NA [4,] -14 -35 [5,] 0 -30 [6,] NA NA [7,] -28 7 [8,] 1 2 [9,] 10 -18 [10,] -1 13 [11,] 113 47 [12,] 100 64 [13,] 102 48 [14,] 104 NA [15,] 107 NA [16,] 108 44 [17,] 101 65 > (d <- dist(x,'manhattan')) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 2 13 3 44 36 4 44 31 16 5 25 20 44 19 6 NA NA NA NA NA 7 40 45 12 56 65 NA 8 8 21 46 52 33 NA 34 9 23 18 64 41 22 NA 63 29 10 19 30 42 61 44 NA 33 13 42 11 165 178 270 209 190 NA 181 157 168 148 12 169 182 244 213 194 NA 185 161 172 152 30 13 155 168 248 199 180 NA 171 147 158 138 12 18 14 208 216 252 236 208 NA 264 206 188 210 18 8 4 15 214 222 258 242 214 NA 270 212 194 216 12 14 10 6 16 157 170 260 201 182 NA 173 149 160 140 8 28 10 8 2 17 171 184 246 215 196 NA 187 163 174 154 30 2 18 6 12 28 > summary(d, na.rm = TRUE) # max = 270 Min. 1st Qu. Median Mean 3rd Qu. Max. NA's 2.00 27.25 147.50 114.60 188.50 270.00 16 > ## First call with "trace" (seg.fault typically later ...): > try( clara(x, k=2, metric="manhattan", sampsize=10, trace = 3) ) C clara(): (nsam,nran,n) = (10,5,17); 'large_sample', C clara(): sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 1 {295} [ntt=7, nunfs=0] .. nsel[1:10]= 6 7 8 9 10 12 13 14 16 17 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 2 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 3 {295} [ntt=7, nunfs=1] .. nsel[1:10]= 1 4 7 9 11 12 13 15 16 17 -> dysta2() . clara(): s:= max dys[1..45] = 270; clara()'s bswap2(*, s=270): new repr. 7 new repr. 1 after build: medoids are 1 7 and min.dist dysma[1:n] are 0 44 40 23 12 18 0 10 10 18 --> sky = sum_j D_j= 175 swp new 8 <-> 7 old; decreasing diss. by -18 Last swap: new 8 <-> 7 old; decreasing diss. by 1 end{bswap2}: sky = 157 selec() -> 'NAfs' obj= 5.47059 C clara(): sample 3 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 6 {295} [ntt=7, nunfs=2] .. nsel[1:10]= 1 2 3 5 8 9 10 11 13 17 -> dysta2() . clara(): s:= max dys[1..45] = 270; clara()'s bswap2(*, s=270): new repr. 5 new repr. 9 after build: medoids are 5 9 and min.dist dysma[1:n] are 8 21 46 33 0 29 13 12 0 18 --> sky = sum_j D_j= 180 swp new 1 <-> 5 old; decreasing diss. by -18 Last swap: new 1 <-> 5 old; decreasing diss. by 1 end{bswap2}: sky = 162 selec() -> 'NAfs' obj= 5.47059 C clara(): sample 4 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 {295} [ntt=7, nunfs=3] .. nsel[1:10]= 1 2 3 4 7 8 9 10 13 14 -> dysta2() . clara(): s:= max dys[1..45] = 264; clara()'s bswap2(*, s=264): new repr. 1 new repr. 10 after build: medoids are 1 10 and min.dist dysma[1:n] are 0 13 44 44 40 8 23 19 4 0 --> sky = sum_j D_j= 195 Last swap: new 9 <-> 10 old; decreasing diss. by 0 end{bswap2}: sky = 195 selec() -> 'NAfs' obj= 5.47059 C clara(): sample 5 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 16 {295} [ntt=7, nunfs=4] .. nsel[1:10]= 2 3 4 6 7 8 9 10 11 17 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample Error in clara(x, k = 2, metric = "manhattan", sampsize = 10, trace = 3) : Observation 6 has *only* NAs --> omit it for clustering > ## Originally:already shows the problem: nbest[] = c(0,0,...,0) must be WRONG!! > ## Now: gives the proper error message. > > ## S-plus 6.1.2 (rel.2 for Linux, 2002) gives > ##> cc <- clara(x, k=2, metric="manhattan", samples=2, sampsize=10) > ## Problem in .Fortran("clara",: Internal error: data for decrementing > ## ref.count didn't point to a valid arena (0x0), while calling subroutine clara > > ## The large example from clara.R -- made small enough to still provoke > ## the "** dysta2() ... OUT" problem {no longer!} > x <- matrix(c(0, 3, -4, 62, 1, 3, -7, 45, 36, 46, 45, 54, -10, + 51, 49, -5, 13, -6, 49, 52, 57, 39, -1, 55, 68, -3, 51, 11, NA, + 9, -3, 50, NA, 58, 9, 52, 12, NA, 47, -12, -6, -9, 5, 30, 38, + 54, -5, 39, 50, 50, 54, 43, 7, 64, 55, 4, 0, 72, 54, 37, 59, + -1, 8, 43, 50, -2, 56, -8, 43, 6, 4, 48, -2, 14, 45, 49, 56, + 51, 45, 11, 10, 42, 50, 2, -12, 3, 1, 2, 2, -14, -4, 8, 0, 3, + -11, 8, 5, 14, -1, 9, 0, 19, 10, -2, -9, 9, 2, 16, 10, 4, 1, + 12, 7, -4, 27, -8, -9, -9, 2, 8, NA, 13, -23, -3, -5, 1, 15, + -3, 5, -9, -5, 14, 8, 7, -4, 26, 20, 10, 8, 17, 4, 14, 23, -2, + 23, 2, 16, 5, 5, -3, 12, 5, 14, -2, 4, 2, -2, 7, 9, 1, -15, -1, + 9, 23, 1, 7, 13, 2, -11, 16, 12, -11, -14, 2, 6, -8), + ncol = 2) > str(x) # 88 x 2 num [1:88, 1:2] 0 3 -4 62 1 3 -7 45 36 46 ... > try(clara(x, 2, samples = 20, trace = 3))# 2nd sample did show dysta2() problem C clara(): (nsam,nran,n) = (44,20,88); C clara(): sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 2 {295} [ntt=44, nunfs=0] .. nsel[1:44]= 2 3 4 6 9 10 12 14 15 18 19 20 24 25 26 28 31 35 38 42 47 48 51 53 54 57 60 61 64 66 68 70 71 73 74 75 76 77 78 79 80 81 82 88 -> dysta2() . clara(): s:= max dys[1..946] = 78.6448; clara()'s bswap2(*, s=78.6448): new repr. 19 new repr. 9 after build: medoids are 9 19 and min.dist dysma[1:n] are 21.2 7.07 9.9 2.83 5.66 5 5.1 9.22 0 11.3 1.41 6.71 6.32 8.49 7.07 12.7 1.41 33.9 0 14.1 7.07 18.9 5.39 4.24 15.5 31.1 5.66 5.66 5.66 4.24 1.41 8.49 11.3 22.6 2.83 4.12 13 0 3.61 5 1.41 17 9.22 12.7 --> sky = sum_j D_j= 385.677 Last swap: new 6 <-> 9 old; decreasing diss. by 0.939294 end{bswap2}: sky = 385.677 selec() -> 'NAfs' obj= 1.64279 C clara(): sample 2 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 12 {295} [ntt=44, nunfs=1] .. nsel[1:44]= 1 2 3 5 6 7 9 12 17 19 26 27 28 29 30 38 39 42 43 45 47 50 52 54 55 56 58 59 60 61 62 64 67 68 71 74 75 76 77 79 80 81 83 84 -> dysta2() . clara(): s:= max dys[1..946] = 81.7435; clara()'s bswap2(*, s=81.7435): new repr. 16 new repr. 43 after build: medoids are 16 43 and min.dist dysma[1:n] are 1.41 21.2 7.07 1.41 2.83 17 5.66 5 14.1 1.41 7.07 15 12.7 14.1 14.1 0 4.24 14.1 8.49 9.9 7.07 2 8.6 14.1 12.1 4.24 1.41 5.66 5.66 5.66 5.66 5.66 4.24 1.41 11.3 2.83 5.83 11 0 5.1 1.41 17 0 17 --> sky = sum_j D_j= 331.98 Last swap: new 17 <-> 43 old; decreasing diss. by 0.492109 end{bswap2}: sky = 331.98 selec() -> 'NAfs' obj= 1.59799 C clara(): sample 3 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 14 {295} [ntt=44, nunfs=2] .. nsel[1:44]= 1 3 4 6 7 9 12 14 15 16 18 19 20 29 30 31 32 36 38 39 40 44 46 47 48 49 51 53 54 56 57 60 62 64 65 66 67 73 75 77 79 81 82 87 -> dysta2() . clara(): s:= max dys[1..946] = 77.8781; clara()'s bswap2(*, s=77.8781): new repr. 19 new repr. 35 after build: medoids are 19 35 and min.dist dysma[1:n] are 1.41 7.07 9.9 2.83 17 5.66 6.4 5.1 4.12 4.24 11.3 1.41 2.83 14.1 14.1 1.41 6 5.66 0 3.16 5.66 18.4 8.06 7.07 16.3 6 7.21 4.24 14 4.24 31.1 5.66 5.66 5.66 0 4.24 4.24 22.6 7.07 0 5.1 17 8.25 7.07 --> sky = sum_j D_j= 338.587 end{bswap2}: sky = 338.587 selec() -> 'NAfs' obj= 1.57294 C clara(): sample 4 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 70 {295} [ntt=44, nunfs=3] .. nsel[1:44]= 1 3 8 9 14 15 16 17 19 20 22 23 28 30 31 32 34 35 36 37 38 39 40 41 45 46 47 49 54 56 57 65 66 67 69 70 71 74 76 77 78 84 86 88 -> dysta2() . clara(): s:= max dys[1..946] = 77.8781; clara()'s bswap2(*, s=77.8781): new repr. 21 new repr. 32 after build: medoids are 21 32 and min.dist dysma[1:n] are 1.41 7.07 7.81 5.66 5.1 4.12 4.24 14.1 1.41 2.83 4.24 0 12.7 14.1 1.41 6 8.06 33.9 5.66 8.49 0 3.16 5.66 5.66 9.9 8.06 7.07 6 14 4.24 31.1 0 4.24 4.24 4.24 8.49 11.3 2.83 9.06 0 7.07 17 1.41 12.7 --> sky = sum_j D_j= 325.933 end{bswap2}: sky = 325.933 selec() -> 'NAfs' obj= 1.57294 C clara(): sample 5 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 80 {295} [ntt=44, nunfs=4] .. nsel[1:44]= 1 2 3 5 7 8 11 13 14 20 22 23 26 28 30 31 33 34 37 38 39 41 45 46 47 50 51 52 57 59 61 64 67 71 76 77 79 80 81 82 85 86 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 6 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 {295} [ntt=44, nunfs=5] .. nsel[1:44]= 2 3 4 5 6 8 10 12 19 20 21 23 24 25 29 30 31 32 33 37 39 41 42 45 46 48 50 53 54 59 61 66 68 69 71 72 73 79 80 82 84 85 86 87 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 7 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 17 {295} [ntt=44, nunfs=6] .. nsel[1:44]= 2 3 6 7 8 12 14 16 17 18 20 22 26 27 29 30 31 32 33 35 36 37 42 44 45 46 49 52 54 58 59 61 62 63 65 67 70 74 75 77 78 79 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 8 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 67 {295} [ntt=44, nunfs=7] .. nsel[1:44]= 2 3 6 7 8 9 11 13 14 18 30 31 32 34 35 37 38 40 43 44 47 48 49 52 54 55 56 58 59 60 66 67 68 70 71 72 73 75 80 83 84 85 87 88 -> dysta2() . clara(): s:= max dys[1..946] = 85.5102; clara()'s bswap2(*, s=85.5102): new repr. 17 new repr. 9 after build: medoids are 9 17 and min.dist dysma[1:n] are 21.2 7.07 2.83 17 9.9 5.66 2.83 1.41 0 11.3 14.1 1.41 9.9 9.22 33.9 8.49 0 5.66 8.49 18.4 7.07 13.9 1.41 8.25 13.9 5.66 4.24 1.41 4.24 5.66 4.24 4.24 1.41 8.49 11.3 0 22.6 11.3 1.41 7.07 17 21.2 7.07 12.7 --> sky = sum_j D_j= 384.698 end{bswap2}: sky = 384.698 selec() -> 'NAfs' obj= 1.54909 C clara(): sample 9 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 67 {295} [ntt=44, nunfs=8] .. nsel[1:44]= 2 4 6 7 8 11 12 13 14 15 17 19 20 21 24 27 29 30 31 33 34 35 36 39 42 45 46 48 50 51 52 54 55 58 60 61 62 65 67 78 80 84 86 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 10 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 5 {295} [ntt=44, nunfs=9] .. nsel[1:44]= 1 3 5 6 7 9 10 14 15 16 17 18 19 20 21 23 28 29 30 32 33 36 37 39 40 44 46 47 51 53 54 55 56 57 65 69 70 74 76 81 82 84 86 87 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 11 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 66 {295} [ntt=44, nunfs=10] .. nsel[1:44]= 1 3 4 5 6 11 13 14 15 18 19 21 28 30 31 32 33 34 39 40 41 42 43 46 47 57 58 59 63 65 66 67 71 72 73 74 75 78 79 80 83 84 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 12 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 21 {295} [ntt=44, nunfs=11] .. nsel[1:44]= 4 5 6 8 9 10 13 14 15 16 17 21 23 25 27 28 30 35 36 41 44 46 47 49 50 54 55 56 57 59 61 62 64 65 66 68 71 72 74 75 76 81 83 84 -> dysta2() . clara(): s:= max dys[1..946] = 78.3135; clara()'s bswap2(*, s=78.3135): new repr. 5 new repr. 2 after build: medoids are 2 5 and min.dist dysma[1:n] are 26.2 0 3.61 9.49 0 13.5 11 20.5 13.9 6.32 15 21.6 2.24 32.1 26.6 12.8 12 24.4 17.9 8.6 10.8 18.1 7.21 20.5 14.9 29.4 26.2 3.61 23 21.1 23 3.61 7 16.6 3.61 9.22 9.49 12.6 13 9.85 22.2 14.2 15.7 11 --> sky = sum_j D_j= 623.732 swp new 43 <-> 5 old; decreasing diss. by -205.358 Last swap: new 43 <-> 5 old; decreasing diss. by 1 end{bswap2}: sky = 418.375 selec() -> 'NAfs' obj= 3.10025 C clara(): sample 13 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 4 {295} [ntt=44, nunfs=12] .. nsel[1:44]= 3 4 5 14 15 16 17 19 20 21 24 25 26 29 30 31 34 35 38 40 41 43 47 49 50 52 55 57 58 60 61 63 64 65 66 68 72 73 74 79 81 83 86 88 -> dysta2() . clara(): s:= max dys[1..946] = 84.1487; clara()'s bswap2(*, s=84.1487): new repr. 19 new repr. 34 after build: medoids are 19 34 and min.dist dysma[1:n] are 7.07 9.9 1.41 5.1 4.12 4.24 14.1 1.41 2.83 8.06 5.39 8.49 7.07 14.1 14.1 1.41 8.06 33.9 0 5.66 5.66 8.49 7.07 6 4 7.62 10.3 31.1 1.41 5.66 5.66 15.6 5.66 0 4.24 1.41 0 22.6 2.83 5.1 17 2 1.41 12.7 --> sky = sum_j D_j= 340.1 end{bswap2}: sky = 340.1 selec() -> 'NAfs' obj= 1.57294 C clara(): sample 14 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 78 {295} [ntt=44, nunfs=13] .. nsel[1:44]= 5 7 8 9 10 11 12 18 19 21 23 27 28 29 31 32 33 35 36 38 39 46 50 51 52 56 57 58 59 60 64 65 66 68 72 73 75 77 78 80 84 86 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 15 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 73 {295} [ntt=44, nunfs=14] .. nsel[1:44]= 2 3 8 10 18 25 26 27 29 31 33 34 35 41 42 43 44 46 47 48 49 53 54 56 57 58 59 60 63 69 70 71 72 73 75 76 77 79 81 84 85 86 87 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 16 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 43 {295} [ntt=44, nunfs=15] .. nsel[1:44]= 5 6 7 8 9 12 14 16 18 22 23 24 27 28 29 30 34 35 36 39 40 41 43 45 56 57 59 60 62 64 65 67 69 70 71 73 74 75 79 81 83 85 86 87 -> dysta2() . clara(): s:= max dys[1..946] = 75.1665; clara()'s bswap2(*, s=75.1665): new repr. 15 new repr. 41 after build: medoids are 15 41 and min.dist dysma[1:n] are 12.7 17 2.83 6.4 15.7 5 7.07 9.9 25.5 13.6 14.1 5 15 1.41 0 0 8.06 19.8 8.49 4.24 8.49 19.8 5.66 12.6 9.9 45.3 5.66 14.8 8.49 9.9 2 10 9.9 22.6 25.5 8.49 11.3 5.83 5.1 2.83 0 7.07 15.6 21.2 --> sky = sum_j D_j= 479.721 end{bswap2}: sky = 479.721 selec() -> 'NAfs' obj= 2.17131 C clara(): sample 17 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 22 {295} [ntt=44, nunfs=16] .. nsel[1:44]= 4 6 9 10 11 12 13 16 19 22 26 27 29 30 33 34 37 38 39 42 43 48 51 54 55 57 60 61 62 63 64 66 69 72 73 75 76 77 78 81 82 85 86 87 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 18 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 39 {295} [ntt=44, nunfs=17] .. nsel[1:44]= 1 4 8 13 15 19 20 23 25 26 27 28 30 31 33 34 36 37 39 41 42 43 44 45 46 47 50 54 55 57 59 60 62 64 65 66 67 72 73 78 79 81 82 85 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 19 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 72 {295} [ntt=44, nunfs=18] .. nsel[1:44]= 1 4 5 6 10 12 13 14 17 18 19 22 23 25 27 30 31 32 33 38 39 40 41 42 44 45 46 48 55 57 58 59 60 61 66 67 69 70 72 74 83 84 85 86 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample C clara(): sample 20 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 48 {295} [ntt=44, nunfs=19] .. nsel[1:44]= 1 3 4 5 7 10 11 13 14 20 22 23 31 32 33 34 35 36 37 40 41 42 43 44 48 50 52 53 55 56 62 63 68 71 72 73 74 75 81 82 83 84 86 88 -> dysta2() dysta2() gave dyst_toomany_NA --> new sample Error in clara(x, 2, samples = 20, trace = 3) : Observation 33 has *only* NAs --> omit it for clustering > ## To see error message for > 1 missing: > try(clara(rbind(NA,x), 2)) Error in clara(rbind(NA, x), 2) : Observations 1,34 have *only* NAs --> omit them for clustering! > > x <- x[-33,] > ## still had the ** dysta2() .. OUT" problem {no longer!} > clara(x, 2, samples = 12, trace = 3) C clara(): (nsam,nran,n) = (44,12,87); 'large_sample', C clara(): sample 1 finding 1st... new k{ran}: .. kall: FALSE, ... nrx [0:1]= 0 0 ... nsel[0:0]= 2 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 7 8 11 14 16 17 21 22 26 29 30 32 33 34 36 37 39 40 41 43 44 45 46 48 49 51 52 54 55 56 58 62 64 66 68 69 71 74 82 83 84 85 86 -> dysta2() . clara(): s:= max dys[1..946] = 76.5376; clara()'s bswap2(*, s=76.5376): new repr. 17 new repr. 40 after build: medoids are 17 40 and min.dist dysma[1:n] are 1.41 17 6.4 2.83 7.07 4.24 14.1 7.28 4.24 7.07 14.1 14.1 4 8.06 33.9 8.49 0 5.66 5.66 14.1 18.4 9.9 6.4 7.07 8 2 8.6 4.24 12.1 4.24 31.1 5.66 15.6 2 4.24 4.24 8.49 0 5.83 0 17 21.2 1.41 7.07 --> sky = sum_j D_j= 384.62 end{bswap2}: sky = 384.62 1st proper or new best: obj= 4.93331 C clara(): sample 2 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 82 ... nsel[0:0]= 70 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 4 8 10 11 13 15 16 18 20 21 22 23 24 25 31 32 33 34 35 36 37 40 41 43 45 46 48 50 52 55 62 64 65 68 69 71 72 77 81 82 84 85 86 87 -> dysta2() . clara(): s:= max dys[1..946] = 82.7103; clara()'s bswap2(*, s=82.7103): new repr. 21 new repr. 40 after build: medoids are 21 40 and min.dist dysma[1:n] are 9.9 6.4 4.47 2.83 1.41 2.24 4.24 11.3 4.47 7.28 4.24 0 5 8.49 1.41 4 8.06 33.9 5.66 8.49 0 5.66 14.1 18.4 6.4 7.07 8 5.66 4.24 4.24 15.6 2 4.24 4.24 8.49 0 22.6 5.1 8.94 0 21.2 1.41 7.07 12.7 --> sky = sum_j D_j= 321.274 end{bswap2}: sky = 321.274 obj= 4.93331 C clara(): sample 3 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 82 ... nsel[0:0]= 38 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 5 10 11 14 18 21 22 23 24 25 26 27 29 33 34 36 37 39 41 42 43 45 47 49 50 55 57 58 60 62 64 67 68 71 73 75 77 79 82 83 84 85 87 -> dysta2() . clara(): s:= max dys[1..946] = 85.5102; clara()'s bswap2(*, s=85.5102): new repr. 18 new repr. 32 after build: medoids are 18 32 and min.dist dysma[1:n] are 21.2 1.41 4 2.83 5.1 11.3 8.06 4.24 0 5.39 8.49 7.07 13 14.1 8.06 33.9 8.49 0 5.66 14.1 8.49 18.4 8.06 16.3 4 7.21 4.24 1.41 4.47 5.66 15.6 0 1.41 4.24 0 2.83 9.06 7.07 1.41 2 17 21.2 1.41 12.7 --> sky = sum_j D_j= 350.699 Last swap: new 40 <-> 32 old; decreasing diss. by 0.7867 end{bswap2}: sky = 350.699 1st proper or new best: obj= 4.80308 C clara(): sample 4 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 85 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 4 5 6 8 10 11 12 15 18 21 24 25 26 28 31 33 37 40 42 43 47 49 50 52 54 55 57 58 59 60 61 62 63 64 71 72 73 74 78 79 81 82 86 -> dysta2() . clara(): s:= max dys[1..946] = 78.3135; clara()'s bswap2(*, s=78.3135): new repr. 18 new repr. 43 after build: medoids are 18 43 and min.dist dysma[1:n] are 21.2 9.9 1.41 2.83 6.4 4.47 2.83 5 2.24 11.3 7.28 5 8.49 7.07 12.7 1.41 8.06 0 5.66 8.49 18.4 17.8 2 5.66 4.24 12.1 4.24 1.41 5.66 5.66 5.66 5.66 15.6 5.66 2 0 22.6 2.83 5.83 5.1 1.41 8.94 0 7.07 --> sky = sum_j D_j= 297.276 end{bswap2}: sky = 297.276 obj= 4.93331 C clara(): sample 5 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 84 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 4 6 9 10 12 13 15 16 17 18 20 21 24 26 27 31 34 35 36 37 40 41 42 43 45 51 52 53 54 55 56 62 64 65 67 68 69 71 72 73 74 77 79 82 -> dysta2() . clara(): s:= max dys[1..946] = 76.5376; clara()'s bswap2(*, s=76.5376): new repr. 20 new repr. 44 after build: medoids are 20 44 and min.dist dysma[1:n] are 9.9 2.83 5.66 4.47 5 1.41 2.24 4.24 14.1 11.3 4.47 7.28 5 7.07 15 1.41 33.9 5.66 8.49 0 5.66 14.1 8.49 18.4 6.4 8.6 4.24 14.1 12.1 4.24 31.1 15.6 2 4.24 1.41 4.24 8.49 0 22.6 2.83 5.83 5.1 1.41 0 --> sky = sum_j D_j= 350.799 end{bswap2}: sky = 350.799 obj= 4.93331 C clara(): sample 6 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 64 ... nsel[0:0]= 33 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 5 7 9 11 13 14 15 17 18 22 26 27 28 34 35 36 37 38 43 46 47 48 50 51 54 55 56 57 61 63 64 66 69 71 73 74 75 76 77 78 80 81 82 -> dysta2() . clara(): s:= max dys[1..946] = 82.0244; clara()'s bswap2(*, s=82.0244): new repr. 18 new repr. 19 after build: medoids are 18 19 and min.dist dysma[1:n] are 1.41 1.41 17 5.66 2.83 1.41 5.66 5.39 14.1 11.3 4.24 7.07 12.6 12.7 33.9 5.66 8.49 0 0 17 7.07 13.6 5.83 9.9 4.47 11.3 4.24 31.1 1.41 5.66 5.66 3.16 4.24 8.49 0 2.83 6.32 8.25 0 8.49 2.83 17 5.1 4.24 --> sky = sum_j D_j= 339.187 end{bswap2}: sky = 339.187 1st proper or new best: obj= 4.79624 C clara(): sample 7 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 26 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 3 4 5 9 10 11 18 19 21 23 24 25 30 37 38 39 40 46 47 50 54 55 56 59 62 65 66 67 68 70 71 72 74 75 76 79 80 81 82 83 84 85 86 -> dysta2() . clara(): s:= max dys[1..946] = 82.7103; clara()'s bswap2(*, s=82.7103): new repr. 15 new repr. 16 after build: medoids are 15 16 and min.dist dysma[1:n] are 1.41 7.07 9.9 1.41 5.66 1.41 2.83 11.3 1.41 11.2 0 8.54 8.49 14.1 0 0 5.66 5.66 7.07 13.6 9.9 11.3 4.24 31.1 5.66 15.6 4.24 4.24 1.41 4.24 11.3 0 22.6 6.32 8.25 0 1.41 17 5.1 4.24 17 21.2 1.41 7.07 --> sky = sum_j D_j= 331.596 end{bswap2}: sky = 331.596 obj= 4.79624 C clara(): sample 8 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 74 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 4 5 10 12 15 16 17 19 20 21 22 23 24 25 26 27 28 29 31 37 38 39 41 42 44 45 46 49 50 51 56 57 61 62 63 64 68 73 75 77 78 80 81 85 -> dysta2() . clara(): s:= max dys[1..946] = 84.1487; clara()'s bswap2(*, s=84.1487): new repr. 20 new repr. 36 after build: medoids are 20 36 and min.dist dysma[1:n] are 9.9 1.41 4 6.4 4.12 4.24 14.1 1.41 2.83 8.06 4.24 0 5.39 8.49 7.07 13 12.7 14.1 1.41 0 3.16 5.66 14.1 8.49 9.9 8.06 7.07 4 7.21 7.62 31.1 1.41 5.66 15.6 5.66 0 4.24 2.83 9.06 7.07 5.1 17 8.25 1.41 --> sky = sum_j D_j= 312.667 end{bswap2}: sky = 312.667 obj= 4.80308 C clara(): sample 9 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 67 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 3 5 9 11 13 17 22 23 25 29 31 32 34 37 38 40 42 43 44 47 49 53 55 56 58 62 63 66 68 69 70 71 72 73 74 75 76 78 81 82 84 85 86 -> dysta2() . clara(): s:= max dys[1..946] = 82.7103; clara()'s bswap2(*, s=82.7103): new repr. 15 new repr. 16 after build: medoids are 15 16 and min.dist dysma[1:n] are 1.41 7.07 1.41 5.66 2.83 1.41 14.1 4.24 0 8.49 14.1 1.41 7.62 33.9 0 0 5.66 8.49 17 9.9 13.6 5.83 17 4.24 31.1 7.07 15.6 5.66 4.24 4.24 8.49 11.3 0 22.6 2.83 6.32 8.25 0 2.83 5.1 4.24 21.2 1.41 7.07 --> sky = sum_j D_j= 355.134 end{bswap2}: sky = 355.134 obj= 4.79624 C clara(): sample 10 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 85 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 4 8 11 13 22 24 25 26 27 29 31 34 36 37 38 39 41 42 44 47 48 49 50 53 57 58 59 60 61 62 63 64 66 67 70 71 72 75 77 79 82 84 87 -> dysta2() . clara(): s:= max dys[1..946] = 85.5102; clara()'s bswap2(*, s=85.5102): new repr. 15 new repr. 33 after build: medoids are 15 33 and min.dist dysma[1:n] are 21.2 9.9 7.81 2.83 1.41 4.24 5.39 8.49 7.07 13 14.1 1.41 33.9 8.49 0 3.16 5.66 14.1 8.49 9.9 16.3 6 4 7.21 14 1.41 4.47 5.66 5.66 5.66 15.6 5.66 0 4.24 1.41 11.3 0 22.6 9.06 7.07 1.41 2 21.2 12.7 --> sky = sum_j D_j= 365.357 end{bswap2}: sky = 365.357 obj= 4.80308 C clara(): sample 11 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 87 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 2 7 8 9 10 12 16 17 20 22 23 24 26 27 29 34 35 36 37 38 41 44 47 48 49 50 51 52 53 54 59 60 61 63 67 68 69 75 76 78 80 81 84 85 -> dysta2() . clara(): s:= max dys[1..946] = 80.9938; clara()'s bswap2(*, s=80.9938): new repr. 19 new repr. 20 after build: medoids are 19 20 and min.dist dysma[1:n] are 21.2 17 7.28 5.66 1.41 9.22 4.24 14.1 5.1 4.24 0 8.54 7.07 12.6 14.1 33.9 5.66 8.49 0 0 14.1 9.9 13.6 5.83 5.83 9.9 4.47 4.24 17 11.3 5.66 5.66 5.66 5.66 1.41 4.24 8.49 8.25 0 2.83 17 5.1 21.2 1.41 --> sky = sum_j D_j= 368.774 end{bswap2}: sky = 368.774 obj= 4.79624 C clara(): sample 12 finding 1st... new k{ran}: .. kall: T, ... nrx [0:1]= 37 38 ... nsel[0:0]= 44 {295} [ntt=43, nunfs=0] .. nsel[1:44]= 1 2 3 7 11 12 14 18 19 20 21 24 28 30 31 32 33 34 37 38 39 40 42 49 51 52 53 55 59 63 68 69 70 72 74 76 77 78 79 81 84 85 86 87 -> dysta2() . clara(): s:= max dys[1..946] = 80.9938; clara()'s bswap2(*, s=80.9938): new repr. 19 new repr. 24 after build: medoids are 19 24 and min.dist dysma[1:n] are 1.41 21.2 7.07 17 2.83 4.12 9.06 11.3 1.41 6.32 7 5.39 12.7 14.1 1.41 2 8.54 33.9 0 5.83 5.66 5.66 8.49 0 9.9 4.24 14.6 4.24 5.66 5.66 4.24 8.49 11.3 22.6 5.1 0 3.16 5.83 1.41 10 21.2 1.41 7.07 12.7 --> sky = sum_j D_j= 351.374 end{bswap2}: sky = 351.374 obj= 5.11334 C clara(): best sample _found_ ; nbest[1:44] = c(1,5,7,9,11,13,14,15,17,18,22,26,27,28,34,35,36,37,38,43,46,47,48,50,51,54,55,56,57,61,63,64,66,69,71,73,74,75,76,77,78,80,81,82) --> dysta2(nbest), resul(), end Call: clara(x = x, k = 2, samples = 12, trace = 3) Medoids: [,1] [,2] [1,] NA 1 [2,] 47 15 Objective function: 4.79624 Clustering vector: int [1:87] 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 ... Cluster sizes: 74 13 Best sample: [1] 1 5 7 9 11 13 14 15 17 18 22 26 27 28 34 35 36 37 38 43 46 47 48 50 51 [26] 54 55 56 57 61 63 64 66 69 71 73 74 75 76 77 78 80 81 82 Available components: [1] "sample" "medoids" "i.med" "clustering" "objective" [6] "clusinfo" "diss" "call" "silinfo" "data" > > data(xclara) > set.seed(123) > xclara[sample(nrow(xclara), 50),] <- NA > try( clara(xclara, k = 3) ) #-> "nice" error message depicting first 12 missing obs Error in clara(xclara, k = 3) : 50 observations (74,126,137,308,411,423,438,451,642,686,689,735 ...) have *only* NAs --> omit them for clustering! > > proc.time() user system elapsed 0.156 0.037 0.248 cluster/tests/sweep-ex.R0000644000175100001440000000411011646600064014772 0ustar hornikusers#### NOT part of the cluster package! #### Find out what exactly sweep() in ../src/spannel.f is doing #### in order to eventually replace it with BLAS calls ! ### subroutine sweep (cov,nord,ixlo,nel,deter) ### =============================== ### is called only once as ### call sweep(cov,ndep,0,i,deter) ### where i in 0:ndep sweep1 <- function(cov, i, det = 1) { ## Purpose: ## ------------------------------------------------------------------------- ## Arguments: ## ------------------------------------------------------------------------- ## Author: Martin Maechler, Date: 22 Jan 2002, 08:58 if(!is.matrix(cov) || 0 != diff(D <- dim(cov))) stop("'cov' must be a square matrix") if((nord <- as.integer(D[1] - 1)) < 1)## cov[0:nord, 0:nord] stop("'cov' must be at least 2 x 2") if(0 > (i <- as.integer(i)) || i > nord) stop("'i' must be in 0:nord, where nord = nrow(cov)-1") storage.mode(cov) <- "double" .C(cluster:::cl_sweep, cov, nord, ixlo = 0:0, i = i, deter=det) } sweepAll <- function(cov, det = 1) { ## Purpose: ## ------------------------------------------------------------------------- ## Arguments: ## ------------------------------------------------------------------------- ## Author: Martin Maechler, Date: 22 Jan 2002, 08:58 if(!is.matrix(cov) || 0 != diff(D <- dim(cov))) stop("'cov' must be a square matrix") if((nord <- as.integer(D[1] - 1)) < 1)## cov[0:nord, 0:nord] stop("'cov' must be at least 2 x 2") storage.mode(cov) <- "double" for(i in 0:nord) { .C(cluster:::cl_sweep, cov, nord, ixlo = 0:0, i = i, deter = det, DUP = FALSE) # i.e. work on 'cov' and 'det' directly if(det <= 0) cat("** i = ", i, "; deter = ", format(det)," <= 0\n",sep="") } list(cov = cov, deter = det) } require(cluster) ## Examples with errors m1 <- cov(cbind(1, 1:5)) try(sweepAll(m1))# deter = 0; cov[2,2] = Inf ## ok (m2 <- cov(cbind(1:5, c(2:5,1), c(4:2,2,6)))) qr(m2, tol = .001)$rank sweepAll(m2) ## deter = 0 cluster/tests/diana-ex.R0000644000175100001440000000145311674325037014737 0ustar hornikuserslibrary(cluster) options(digits = 6) data(votes.repub) di.votes <- daisy(votes.repub) .p00 <- proc.time() summary(diana(votes.repub, metric = "manhattan", stand = TRUE)) summary(diana(di.votes, keep.diss = FALSE)) cat('Time elapsed: ', proc.time() - .p00,'\n') data(agriculture) data(ruspini) .p0 <- proc.time() dia.agr <- diana(agriculture) drusp0 <- diana(ruspini, keep.diss=FALSE, keep.data=FALSE) drusp1 <- diana(ruspini, metric = "manhattan") cat('Time elapsed: ', proc.time() - .p0,'\n') summary(dia.agr) summary(drusp0) summary(drusp1) str (drusp1) ## From system.file("scripts/ch11.R", package = "MASS") data(swiss) swiss.x <- as.matrix(swiss[,-1]) .p1 <- proc.time() dCH <- diana(swiss.x) cat('Time elapsed: ', proc.time() - .p1,'\n') str(as.dendrogram(as.hclust(dCH)))# keep back-compatible cluster/src/0000755000175100001440000000000012553131036012537 5ustar hornikuserscluster/src/spannel.c0000644000175100001440000001005612553131040014340 0ustar hornikusers/* Compute the SPANNing ELlipsoid * ------------------------------ for clusplot.default(*, span = TRUE) * Original spannel.f -- translated by f2c (version 20010821). * and f2c-clean,v 1.10 2002/03/28 16:37:27 maechler */ #include #include "cluster.h" #ifdef DEBUG_spannel # include #endif void spannel(int *ncas, /* = number of objects */ int *ndep, /* = number of variables */ double *dat,/* [ncas, 0:ndep] */ double *dstopt, /* = squared distances [1:ncas] */ double *cov,/* matrix [0:ndep, 0:ndep] */ double *varsum, /* [1:ndep] */ double *varss, /* [1:ndep] */ double *prob, /* [1:ncas] */ double *work, /* [0:ndep] */ double *eps, int *maxit, /* = maximal # iterations (and returns #{iter.})*/ int *ierr) { static int c__0 = 0; int it, i, j, k; double dmax, p, deter; int dat_dim1 = *ncas; int cov_dim1 = *ndep + 1; #define COV(i,j) cov[i + j * cov_dim1] #define X(i,j) dat[i + j * dat_dim1] /* X(i,j) : i = 0..(n-1), j = 0,..p often 1..p */ /* Parameter adjustments */ --varss; --varsum; /* When spannel() is called, dat[i,0] are all == 1 -- left unchanged: * Scale Data dat[i,j] to mean = 0 and var{1/n} = 1 -- for j= 1:ndep (not j=0!) */ for (j = 1; j <= *ndep; ++j) { varsum[j] = 0.; varss[j] = 0.; } for (i = 0; i < *ncas; ++i) { for (j = 1; j <= *ndep; ++j) { p = X(i,j); varsum[j] += p; varss [j] += p * p; } } for (j = 1; j <= *ndep; ++j) { double aver = varsum[j] / *ncas, scal = sqrt(varss[j] / *ncas - aver * aver); #ifdef DEBUG_spannel Rprintf("j= %d, scal = %g\n", j, scal); #endif for (i = 0; i < *ncas; ++i) X(i,j) = (X(i,j) - aver) / scal; } p = 1. / (double) (*ncas); for (i = 0; i < *ncas; ++i) prob[i] = p; *ierr = 0; p = (double) (*ndep); /* ---- Repeat { ... up to `maxit' times ] */ for(it = 0; it < *maxit; it++) { /* Cov[,] = weighted covariance of dat[,] {weights = prob[]} */ for (j = 0; j <= *ndep; ++j) for (k = 0; k <= j; ++k) COV(k,j) = 0.; for (i = 0; i < *ncas; ++i) { for (j = 0; j <= *ndep; ++j) { work[j] = X(i,j); double tempo = prob[i] * work[j]; for (k = 0; k <= j; ++k) COV(k,j) += tempo * work[k]; } } for (j = 0; j <= *ndep; ++j) for (k = 0; k <= j; ++k) COV(j,k) = COV(k,j); deter = 1.; for (i = 0; i <= *ndep; ++i) { cl_sweep(cov, ndep, &c__0, &i, &deter); if (deter <= 0.) { *ierr = 2; return; } } #ifdef DEBUG_spannel Rprintf(" it= %d; after all sweep()s : deter = %g\n", it, deter); #endif dmax = 0.; for (i = 0; i < *ncas; ++i) { double dist = -1.; for (j = 0; j <= *ndep; ++j) { /* work(j) = - sum_{k=0}^p dat(i,k) * cov(k,j) { = cov(j,k) }, * i.e., work_j = - X[i,] %*% COV[,j] */ double w_j = 0.; for (k = 0; k <= *ndep; ++k) w_j -= COV(j,k) * X(i,k); dist += w_j * X(i,j); } dstopt[i] = dist;/* Dist{opt}_i = -1 - t(X[i,]) %*% COV %*% X[i,] */ if (dmax < dist) dmax = dist; }/* for() : now dmax == max{ dstopt[i] } */ if (dmax <= p + *eps) { /* _converged_ */ *maxit = it; return; } /* else not yet converged */ for (i = 0; i < *ncas; ++i) prob[i] *= (dstopt[i] / p); } return;/* with it == *maxit and no convergence */ } /* spannel */ #undef X /* This is currently also called from R : ../tests/sweep-ex.R * ==> keep pointer args !*/ void cl_sweep(double *cov, int *nord, int *ixlo, int *nel, double *deter) { int i, j, cov_dim1 = *nord + 1; double temp = COV(*nel,*nel); *deter *= temp; if (*deter <= 0.) return; /* singular case -- signaled via *deter */ if (*nord <= 1) { COV(1,1) = 1. / temp; } else { /* nord > 1 */ for (i = *ixlo; i <= *nord; ++i) if (i != *nel) { for (j = *ixlo; j <= i; ++j) if (j != *nel) { COV(j,i) = COV(i,j) - COV(i,*nel) * COV(*nel,j) / temp; COV(i,j) = COV(j,i); } } COV(*nel,*nel) = 1.; for (i = *ixlo; i <= *nord; ++i) { COV(*nel,i) = -COV(i,*nel) / temp; COV(i,*nel) = COV(*nel,i); } } return; } /* cl_sweep */ #undef COV cluster/src/pam.c0000644000175100001440000005577712553131040013500 0ustar hornikusers/* * PAM := Partitioning Around Medoids * * original Id: pam.f,v 1.16 2003/06/03 13:40:56 maechler translated by * f2c (version 20031025) and run through f2c-clean,v 1.10 2002/03/28 */ #include #include #include #include /* for diagnostics */ #include /* for interrupting */ #include "cluster.h" #include "ind_2.h" // carries out a clustering using the k-medoid approach // The .C() version --- no longer used, since Jan.2015 void cl_pam(int *nn, int *p, int *kk, double *x, double *dys, int *jdyss, /* jdyss = 0 : compute distances from x * = 1 : distances provided in x */ double *valmd, int *jtmd, int *ndyst, int *nsend, int/*logical*/ *nrepr, int *nelem, double *radus, double *damer, double *avsyl, double *separ, double *ttsyl, double *obj, int *med, int *ncluv, double *clusinf, double *sylinf, int *nisol, int* pamonce) { int clusinf_dim1 = *kk; /* Local variables */ Rboolean all_stats = (obj[0] == 0.),// all_stats == !cluster.only med_given = (med[0] != 0),/* if true, med[] contain initial medoids */ do_swap = (nisol[0] != 0); int k, i, nhalf, trace_lev = (int) obj[1]; double s; /* Function Body */ nhalf = *nn * (*nn - 1) / 2 + 1; /* nhalf := #{distances}+1 = length(dys) */ if (*jdyss != 1) { int jhalt = 0; if(trace_lev) Rprintf("C pam(): computing %d dissimilarities from %d x %d matrix: ", nhalf, *nn, *p); F77_CALL(dysta)(nn, p, x, dys, ndyst, jtmd, valmd, &jhalt); if (jhalt != 0) { if(trace_lev) Rprintf(" dysta()-error: jhalt=%d\n", jhalt); *jdyss = -1; return; } // else if(trace_lev) Rprintf("[Ok]\n"); } /* s := max( dys[.] ), the largest distance */ for (i = 1, s = 0.; i < nhalf; ++i) /* dys[0] == 0. not used here */ if (s < dys[i]) s = dys[i]; /* FIXME: work with med[] = (i_1, i_2, ..., i_k) * ----- instead nrepr[] = (b_1, ... b_n) b_i in {0,1} */ for (i = 0; i < *nn; ++i) nrepr[i] = 0; if(med_given) { /* if true, med[] contain initial medoids */ /* for the moment, translate these to nrepr[] 0/1 : * not assuming that the med[] indices are sorted */ for (k = 0; k < *kk; k++) nrepr[med[k] - 1] = 1; } /* Build + Swap [but no build if(med_given); swap only if(do_swap) : */ bswap(*kk, *nn, nrepr, med_given, do_swap, trace_lev, radus, damer, avsyl, dys, s, obj, *pamonce); if(trace_lev) Rprintf("end{bswap()}, "); /* Compute Clustering & STATs if(all_stats): */ cstat(*kk, *nn, nsend, nrepr, all_stats, radus, damer, avsyl, separ, &s, dys, ncluv, nelem, med, nisol); if(trace_lev) Rprintf("end{cstat()}\n"); if(all_stats) { for (k = 0; k < *kk; ++k) { clusinf[k]= (double) nrepr[k]; clusinf[k + clusinf_dim1] = radus[k]; clusinf[k + (clusinf_dim1 << 1)] = avsyl[k]; clusinf[k + clusinf_dim1 * 3] = damer[k]; clusinf[k + (clusinf_dim1 << 2)] = separ[k]; } if (1 < *kk && *kk < *nn) { /* Compute Silhouette info : */ dark(*kk, *nn, ncluv, dys, s, // --> nsend, nelem, nrepr, radus, damer, avsyl, ttsyl, sylinf); } } } /* cl_pam */ // The .Call() version SEXP cl_Pam(SEXP k_, SEXP n_, SEXP do_diss_, /* == !diss; if true, compute distances from x (= x_or_diss); otherwise distances provided by x_or_diss */ SEXP x_or_diss,// this "is" if(do_diss) "x[]" (n x p) else "dys[]" SEXP all_stats_, // all_stats == !cluster.only SEXP medoids, // NULL or integer(k) subset {1:n} SEXP do_swap_, SEXP trace_lev_, SEXP keep_diss_, SEXP pam_once_, // the next 3 are only needed if(do_diss) SEXP val_md, SEXP j_md, // "md" := [m]issing [d]ata SEXP dist_kind) // = 1 ("euclidean") or 2 ("manhattan") { const int kk = asInteger(k_), n = asInteger(n_), pam_once = asInteger(pam_once_), trace_lev = asInteger(trace_lev_); const Rboolean all_stats = asLogical(all_stats_) , med_given = (medoids != R_NilValue) // if true, med[] contain initial medoids , do_diss = asLogical(do_diss_) , do_swap = asLogical(do_swap_) , keep_diss = asLogical(keep_diss_) // only if(keep_diss) return dys[] .. , do_syl = all_stats && (1 < kk && kk < n); #ifdef once_we_get_n_from_args int n, p = NA_INTEGER; if (do_diss) { // <-- was 'jdyss != 1' i.e. jdyss == 0 SEXP dims = getAttrib(x_or_diss, R_DimSymbol); n = INTEGER(dims)[0]; p = INTEGER(dims)[1]; } else { n = asInteger(getAttrib(x_or_diss, install("Size"))); } #endif int i, nhalf = n * (n - 1) / 2 + 1; // nhalf := #{distances}+1 = length(dys) double s; int *nsend = (int*) R_alloc(n, sizeof(int)) , *nelem = (int*) R_alloc(all_stats ? n : 1, sizeof(int)) /* Rboolean */ , *nrepr = (int*) R_alloc(n, sizeof(int)) , *med ; double *radus = (double*) R_alloc( n, sizeof(double)), *damer = (double*) R_alloc( n, sizeof(double)), *separ = (double*) R_alloc(kk, sizeof(double)); int clusinf_dim1 = kk; if(med_given) { if(TYPEOF(medoids) != INTSXP || LENGTH(medoids) != kk) error(_("Invalid 'medoids'")); PROTECT(medoids = duplicate(medoids)); } else { PROTECT(medoids = allocVector(INTSXP, kk)); } med = INTEGER(medoids); SEXP nms, ans = PROTECT(allocVector(VECSXP, keep_diss ? 9 : 9-1)); setAttrib(ans, R_NamesSymbol, nms = allocVector(STRSXP, keep_diss ? 9 : 9-1)); int nprot = 2; // <- ++ for each PROTECT() below SEXP dys_, avsyl_, obj_, clu_, clusinf_, sylinf_, nisol_,ttsyl_; // these are only used if(do_diss) : double *valmd; int *jtmd; int *ndyst; if (do_diss) { // <-- was 'jdyss != 1' i.e. jdyss == 0 PROTECT(dys_ = allocVector(REALSXP, nhalf)); nprot++; valmd = REAL(val_md); jtmd = INTEGER(j_md); ndyst= INTEGER(dist_kind); // = 1 ("euclidean") or 2 ("manhattan") } else { dys_ = x_or_diss; // a pointer to the same thing } // Creating the SEXPs as list components, so they are auto-PROTECTed: SET_STRING_ELT(nms, 0, mkChar("clu")); SET_VECTOR_ELT(ans, 0, clu_ = allocVector(INTSXP, n)); SET_STRING_ELT(nms, 1, mkChar("med")); SET_VECTOR_ELT(ans, 1, medoids); SET_STRING_ELT(nms, 2, mkChar("silinf")); if(do_syl) SET_VECTOR_ELT(ans, 2, sylinf_ = all_stats ? allocMatrix(REALSXP, n, 4) : allocVector(REALSXP, 1)); SET_STRING_ELT(nms, 3, mkChar("obj")); SET_VECTOR_ELT(ans, 3, obj_ = allocVector(REALSXP, 2)); SET_STRING_ELT(nms, 4, mkChar("isol")); SET_VECTOR_ELT(ans, 4, nisol_ = allocVector(INTSXP, all_stats ? kk : 1)); SET_STRING_ELT(nms, 5, mkChar("clusinf")); SET_VECTOR_ELT(ans, 5, clusinf_ = all_stats ? allocMatrix(REALSXP, kk, 5) : allocVector(REALSXP, 1)); SET_STRING_ELT(nms, 6, mkChar("avsil")); SET_VECTOR_ELT(ans, 6, avsyl_ = allocVector(REALSXP, n)); SET_STRING_ELT(nms, 7, mkChar("ttsil")); if(do_syl) SET_VECTOR_ELT(ans, 7, ttsyl_ = allocVector(REALSXP, 1)); if(keep_diss) { SET_STRING_ELT(nms, 8, mkChar("dys")); SET_VECTOR_ELT(ans, 8, dys_); } int *ncluv = INTEGER(clu_), *nisol = INTEGER(nisol_); double *dys = REAL(dys_), *avsyl = REAL(avsyl_), *obj = REAL(obj_), *clusinf= REAL(clusinf_); if (do_diss) { // <-- was 'jdyss != 1' i.e. jdyss == 0 double *x = REAL(x_or_diss); int jhalt = 0; SEXP dims = getAttrib(x_or_diss, R_DimSymbol); int p = INTEGER(dims)[1]; if(trace_lev) Rprintf("C pam(): computing %d dissimilarities from %d x %d matrix: ", nhalf, n, p); F77_CALL(dysta)((int*)&n, &p, x, dys, ndyst, jtmd, valmd, &jhalt); if (jhalt != 0) { if(trace_lev) Rprintf(" dysta()-error: jhalt=%d\n", jhalt); return ScalarInteger(jhalt); // i.e., integer error code instead of a named list } // else if(trace_lev) Rprintf("[Ok]\n"); } /* s := max( dys[.] ), the largest distance */ for (i = 1, s = 0.; i < nhalf; ++i) /* dys[0] == 0. not used here */ if (s < dys[i]) s = dys[i]; /* FIXME: work with med[] = (i_1, i_2, ..., i_k) * ----- instead nrepr[] = (b_1, ... b_n) b_i in {0,1} */ for (i = 0; i < n; ++i) nrepr[i] = 0; if(med_given) { /* if true, med[] contain initial medoids */ /* for the moment, translate these to nrepr[] 0/1 : * not assuming that the med[] indices are sorted */ for (int k = 0; k < kk; k++) nrepr[med[k] - 1] = 1; } /* Build + Swap [but no build if(med_given); swap only if(do_swap) : */ bswap(kk, n, nrepr, // <- 3 med_given, do_swap, trace_lev,// <- 6 radus, damer, avsyl, // <- 9 dys, s, obj, // <- 12 pam_once); if(trace_lev) Rprintf("end{bswap()}, "); /* Compute Clustering & STATs if(all_stats): */ cstat(kk, n, nsend, nrepr, all_stats, radus, damer, avsyl, separ, &s, dys, ncluv, nelem, med, nisol); if(trace_lev) Rprintf("end{cstat()}\n"); if(all_stats) { for (int k = 0; k < kk; ++k) { clusinf[k]= (double) nrepr[k]; clusinf[k + clusinf_dim1] = radus[k]; clusinf[k + (clusinf_dim1 << 1)] = avsyl[k]; clusinf[k + clusinf_dim1 * 3] = damer[k]; clusinf[k + (clusinf_dim1 << 2)] = separ[k]; } if (do_syl) { // Compute Silhouette info : double *ttsyl = REAL(ttsyl_), *sylinf = REAL(sylinf_); dark(kk, n, ncluv, dys, s, // --> nsend, nelem, nrepr, radus, damer, avsyl, ttsyl, sylinf); } } UNPROTECT(nprot); return ans; } /* cl_Pam */ /* ----------------------------------------------------------- bswap(): the clustering algorithm in 2 parts: I. build, II. swap */ void bswap(int kk, int n, int *nrepr, Rboolean med_given, Rboolean do_swap, int trace_lev, /* nrepr[]: here is boolean (0/1): 1 = "is representative object" */ double *dysma, double *dysmb, double *beter, const double dys[], double s, double *obj, int pamonce) { int i, j, ij, k,h, dig_n; double sky; /* Parameter adjustments */ --nrepr; --beter; --dysma; --dysmb; if(trace_lev) Rprintf("pam()'s bswap(*, s=%g, pamonce=%d): ", s, pamonce); s = s * 1.1 + 1.;// larger than all dys[] (but DBL_MAX is too large) /* IDEA: when n is large compared to k (= kk), * ---- rather use a "sparse" representation: * instead of boolean vector nrepr[] , use ind_repr <- which(nrepr) !! */ for (i = 1; i <= n; ++i) dysma[i] = s; if(med_given) { if(trace_lev) Rprintf("medoids given\n"); /* compute dysma[] : dysma[j] = D(j, nearest_representative) */ for (i = 1; i <= n; ++i) { if (nrepr[i] == 1) for (j = 1; j <= n; ++j) { ij = ind_2(i, j); if (dysma[j] > dys[ij]) dysma[j] = dys[ij]; } } } else { /* ====== first algorithm: BUILD. ====== */ if(trace_lev) Rprintf("build %d medoids:\n", kk); /* find kk representatives aka medoids : */ for (k = 1; k <= kk; ++k) { R_CheckUserInterrupt(); /* compute beter[i] for all non-representatives: * also find ammax := max_{..} and nmax := argmax_i{beter[i]} ... */ int nmax = -1; /* -Wall */ double ammax, cmd; ammax = 0.; for (i = 1; i <= n; ++i) { if (nrepr[i] == 0) { beter[i] = 0.; for (j = 1; j <= n; ++j) { cmd = dysma[j] - dys[ind_2(i, j)]; if (cmd > 0.) beter[i] += cmd; } if (ammax <= beter[i]) { /* does < (instead of <= ) work too? -- NO! */ ammax = beter[i]; nmax = i; } } } nrepr[nmax] = 1;/* = .true. : found new representative */ if (trace_lev >= 2) Rprintf(" new repr. %d\n", nmax); /* update dysma[] : dysma[j] = D(j, nearest_representative) */ for (j = 1; j <= n; ++j) { ij = ind_2(nmax, j); if (dysma[j] > dys[ij]) dysma[j] = dys[ij]; } } /* output of the above loop: nrepr[], dysma[], ... */ } if(trace_lev) /* >= 2 (?) */ { dig_n = 1+floor(log10(n)); Rprintf(" after build: medoids are"); for (i = 1; i <= n; ++i) if(nrepr[i] == 1) Rprintf(" %*d", dig_n, i); if(trace_lev >= 3) { Rprintf("\n and min.dist dysma[1:n] are\n"); for (i = 1; i <= n; ++i) { Rprintf(" %6.3g", dysma[i]); if(i % 10 == 0) Rprintf("\n"); } if(n % 10 != 0) Rprintf("\n"); } else Rprintf("\n"); } else dig_n = 1;// -Wall sky = 0.; for (j = 1; j <= n; ++j) sky += dysma[j]; obj[0] = sky / n; if (do_swap && (kk > 1 || med_given)) { double dzsky; int hbest = -1, nbest = -1, kbest= -1; // -Wall int *medoids, *clustmembership; double *fvect; if(pamonce) { // +1 --> use 1-based indices (as R) medoids = (int*) R_alloc(kk+1, sizeof(int)); clustmembership = (int*) R_alloc(n+1, sizeof(int)); fvect = (double*) R_alloc(n+1, sizeof(double)); for (int k = 1, i = 1; i <= n; ++i) { if (nrepr[i]) { medoids[k] = i; k++; } } } else { // -Wall : clustmembership = medoids = (int*) NULL; fvect = (double*) NULL; } /* ====== second algorithm: SWAP. ====== */ /* Hmm: In the following, we RE-compute dysma[]; * don't need it first time; then only need *update* after swap */ /*-- Loop : */ L60: if(pamonce == 0) { // original algorihtm for (j = 1; j <= n; ++j) { /* dysma[j] := D_j d(j, ) [KR p.102, 104] * dysmb[j] := E_j d(j, <2-nd cl.medi>) [p.103] */ dysma[j] = s; dysmb[j] = s; for (i = 1; i <= n; ++i) { if (nrepr[i]) { ij = ind_2(i, j); if (dysma[j] > dys[ij]) { dysmb[j] = dysma[j]; dysma[j] = dys[ij]; } else if (dysmb[j] > dys[ij]) { dysmb[j] = dys[ij]; } } } } } else { // pamonce == 1 or == 2 : for (j = 1; j <= n; ++j) { /* dysma[j] := D_j d(j, ) [KR p.102, 104] * dysmb[j] := E_j d(j, <2-nd cl.medi>) [p.103] */ dysma[j] = s; dysmb[j] = s; for(k = 1; k <= kk; k++) { i = medoids[k]; ij = ind_2(i, j); if (dysma[j] > dys[ij]) { //store cluster membership clustmembership[j] = i; dysmb[j] = dysma[j]; dysma[j] = dys[ij]; } else if (dysmb[j] > dys[ij]) { dysmb[j] = dys[ij]; } } } } dzsky = 1.; /* 1 is arbitrary > 0; only dzsky < 0 matters in the end */ if(pamonce == 0) { // original algorihtm for (h = 1; h <= n; ++h) if (!nrepr[h]) { R_CheckUserInterrupt(); for (i = 1; i <= n; ++i) if (nrepr[i]) { double dz = 0.; /* dz := T_{ih} := sum_j C_{jih} [p.104] : */ for (j = 1; j <= n; ++j) { /* if (!nrepr[j]) { */ int hj = ind_2(h, j); ij = ind_2(i, j); if (dys[ij] == dysma[j]) { double small = dysmb[j] > dys[hj] ? dys[hj] : dysmb[j]; dz += (- dysma[j] + small); } else if (dys[hj] < dysma[j]) /* 1c. */ dz += (- dysma[j] + dys[hj]); } if (dzsky > dz) { dzsky = dz; /* dzsky := min_{i,h} T_{i,h} */ hbest = h; nbest = i; } } } } else { // pamonce == 1 or == 2 : for(k = 1; k <= kk; k++) { R_CheckUserInterrupt(); i=medoids[k]; double removeCost = 0.; //Compute cost for removing the medoid for (j = 1; j <= n; ++j) { if(clustmembership[j] == i) { removeCost+=(dysmb[j]-dysma[j]); fvect[j]=dysmb[j]; } else{ fvect[j]=dysma[j]; } } if (pamonce == 1) { // Now check possible new medoids h for (h = 1; h <= n; ++h) if (!nrepr[h]) { double addGain = removeCost; // Compute gain of adding h as a medoid: for (j = 1; j <= n; ++j) { int hj = ind_2(h, j); if(dys[hj] < fvect[j]) addGain += (dys[hj]-fvect[j]); } if (dzsky > addGain) { dzsky = addGain; /* dzsky := min_{i,h} T_{i,h} */ hbest = h; nbest = i; kbest = k; } } } else { // pamonce == 2 : // Now check possible new medoids h for (h = 1; h <= n; ++h) if (!nrepr[h]) { double addGain = removeCost - fvect[h]; // - fvect[h] since dys[h,h]=0; // Compute gain of adding h as a medoid: int ijbase = (h-2)*(h-1)/2; for (j = 1; j < h; ++j) { int hj = ijbase+j; if(dys[hj] < fvect[j]) addGain += (dys[hj]-fvect[j]); } ijbase += h;// = (h-2)*(h-1)/2 + h for (j = h+1; j <= n; ++j) { ijbase += j-2; if(dys[ijbase] < fvect[j]) addGain += (dys[ijbase]-fvect[j]); } if (dzsky > addGain) { dzsky = addGain; /* dzsky := min_{i,h} T_{i,h} */ hbest = h; nbest = i; kbest = k; } } } } } if (dzsky < - 16*DBL_EPSILON * fabs(sky)) { // basically " < 0 ", // but ' < 0 ' gave infinite loop, swapping the identical objects // found an improving swap if(trace_lev >= 2) Rprintf( " swp new %*d <-> %*d old; decreasing diss. %7g by %g\n", dig_n, hbest, dig_n, nbest, sky, dzsky); nrepr[hbest] = 1; nrepr[nbest] = 0; if(pamonce) medoids[kbest]=hbest; sky += dzsky; goto L60; } } obj[1] = sky / n; } /* bswap */ /* ----------------------------------------------------------- cstat(): Compute STATistics (numerical output) concerning each partition */ void cstat(int kk, int nn, int *nsend, int *nrepr, Rboolean all_stats, double *radus, double *damer, double *avsyl, double *separ, double *s, double *dys, int *ncluv, int *nelem, int *med, int *nisol) { int j, k, ja, jk, nplac, ksmal = -1/* -Wall */; double ss = *s * 1.1 + 1.; /* Parameter adjustments */ --ncluv; --nrepr; --nsend; /* nsend[j] := i, where x[i,] is the medoid to which x[j,] belongs */ for (j = 1; j <= nn; ++j) { if (nrepr[j] == 0) { double dsmal = ss; for (k = 1; k <= nn; ++k) { if (nrepr[k] == 1) { int kj_ = ind_2(k, j); if (dsmal > dys[kj_]) { dsmal = dys[kj_]; ksmal = k; } } } nsend[j] = ksmal; } else { nsend[j] = j; } } /* ncluv[j] := k , the cluster number (k = 1..kk) */ jk = 1; nplac = nsend[1]; for (j = 1; j <= nn; ++j) { ncluv[j] = 0; if (nsend[j] == nplac) ncluv[j] = 1; } for (ja = 2; ja <= nn; ++ja) { nplac = nsend[ja]; if (ncluv[nplac] == 0) { ++jk; for (j = 2; j <= nn; ++j) { if (nsend[j] == nplac) ncluv[j] = jk; } if (jk == kk) break; } } if(all_stats) { /* analysis of the clustering. */ int numl; --avsyl; // <-> [1]-indexing --damer; --med; --nelem; --nisol; --radus; --separ; for (k = 1; k <= kk; ++k) { int ntt = 0, m = -1/* -Wall */; double ttt = 0.; radus[k] = -1.; R_CheckUserInterrupt(); for (j = 1; j <= nn; ++j) { if (ncluv[j] == k) { double djm; ++ntt; m = nsend[j]; nelem[ntt] = j; djm = dys[ind_2(j, m)]; ttt += djm; if (radus[k] < djm) radus[k] = djm; } } if(ntt == 0) error(_("pam(): Bug in C level cstat(), k=%d: ntt=0"), k); avsyl[k] = ttt / ntt; med[k] = m; } if (kk == 1) { damer[1] = *s; nrepr[1] = nn; nisol[1] = 0; separ[1] = 0.; return; } /* ELSE kk > 1 : */ /* numl = number of L-clusters. */ numl = 0; for (k = 1; k <= kk; ++k) { /* identification of cluster k: nelem= vector of object indices, nel = number of objects */ int nel = 0; R_CheckUserInterrupt(); for (j = 1; j <= nn; ++j) { if (ncluv[j] == k) { ++nel; nelem[nel] = j; } } nrepr[k] = nel; if (nel == 1) { int nvn = nelem[1]; damer[k] = 0.; separ[k] = ss; for (j = 1; j <= nn; ++j) { if (j != nvn) { int mevj = ind_2(nvn, j); if (separ[k] > dys[mevj]) separ[k] = dys[mevj]; } } /* Is cluster k 1) an L-cluster or 2) an L*-cluster ? */ if (separ[k] == 0.) ++numl; } else { /* nel != 1 : */ double dam = -1., sep = ss; Rboolean kand = TRUE; for (ja = 1; ja <= nel; ++ja) { int jb, nvna = nelem[ja]; double aja = -1., ajb = ss; for (jb = 1; jb <= nn; ++jb) { int jndz = ind_2(nvna, jb); if (ncluv[jb] == k) { if (aja < dys[jndz]) aja = dys[jndz]; } else { if (ajb > dys[jndz]) ajb = dys[jndz]; } } if (kand && aja >= ajb) kand = FALSE; if (dam < aja) dam = aja; if (sep > ajb) sep = ajb; } separ[k] = sep; damer[k] = dam; if (kand) { ++numl; if (dam >= sep) /* L-cluster */ nisol[k] = 1; else/* L*-cluster */ nisol[k] = 2; continue /* k */; } } /* nel = 1 or (!kand) : */ nisol[k] = 0; }/* for(k) */ } /* all_stats */ } /* cstat */ /* ----------------------------------------------------------- Compute Silhouette Information : */ void dark( // input: int kk, int nn, const int ncluv[], const double dys[], double s, // output: int *nsend, int *nelem, int *negbr, double *syl, double *srank, double *avsyl, double *ttsyl, double *sylinf) { int k, nsylr; /* pointers to sylinf[] columns -- sylinf[nn, 4] : */ double *sylinf_2, *sylinf_3, *sylinf_4; sylinf_2 = sylinf + nn; sylinf_3 = sylinf_2 + nn; sylinf_4 = sylinf_3 + nn; /* Parameter adjustments */ --avsyl; --ncluv; nsylr = 0; *ttsyl = 0.; for (k = 1; k <= kk; ++k) { /* nelem[0:(ntt-1)] := indices (1-based) of obs. in cluster k : */ int j,l, ntt = 0; for (j = 1; j <= nn; ++j) { if (ncluv[j] == k) { nelem[ntt] = j; ++ntt; } } for (j = 0; j < ntt; ++j) {/* (j+1)-th obs. in cluster k */ int k_, nj = nelem[j]; double dysb = s * 1.1 + 1.; negbr[j] = -1; /* for all clusters k_ != k : */ for (k_ = 1; k_ <= kk; ++k_) if (k_ != k) { double db = 0.; int nbb = 0; for (l = 1; l <= nn; ++l) if (ncluv[l] == k_) { ++nbb; if (l != nj) db += dys[ind_2(nj, l)]; } db /= nbb; /* now db(k_) := mean( d[j, l]; l in C_{k_} ) */ if (dysb > db) { dysb = db; negbr[j] = k_; } }/* negbr[j] := arg max_{k_} db(k_) */ if (ntt > 1) { double dysa = 0.; for (l = 0; l < ntt; ++l) { int nl = nelem[l]; if (nj != nl) dysa += dys[ind_2(nj, nl)]; } dysa /= ntt - 1; if (dysa > 0.) { if (dysb > 0.) { if (dysb > dysa) syl[j] = 1. - dysa / dysb; else if (dysb < dysa) syl[j] = dysb / dysa - 1.; else /* dysb == dysa: */ syl[j] = 0.; if (syl[j] < -1.) syl[j] = -1.; else if (syl[j] > 1.) syl[j] = 1.; } else { syl[j] = -1.; } } else /* dysa == 0 */ if (dysb > 0.) syl[j] = 1.; else syl[j] = 0.; } else { /* ntt == 1: */ syl[j] = 0.; } } /* for( j ) */ avsyl[k] = 0.; if (ntt == 0) /* this can happen when medoids are user-specified !*/ continue; /* next k */ for (j = 0; j < ntt; ++j) { int lang=-1 /*Wall*/; double symax = -2.; for (l = 0; l < ntt; ++l) { if (symax < syl[l]) { symax = syl[l]; lang = l; } } nsend[j] = lang; srank[j] = symax; /* = syl[lang] */ avsyl[k] += srank[j]; syl[lang] = -3.; } *ttsyl += avsyl[k]; avsyl[k] /= ntt; if (ntt == 1) { sylinf [nsylr] = (double) k; sylinf_2[nsylr] = (double) negbr[0]; sylinf_3[nsylr] = 0.; sylinf_4[nsylr] = (double) nelem[0]; ++nsylr; } else { for (j = 0; j < ntt; ++j) { int lplac = nsend[j]; sylinf [nsylr] = (double) k; sylinf_2[nsylr] = (double) negbr[lplac]; sylinf_3[nsylr] = srank[j]; sylinf_4[nsylr] = (double) nelem[lplac]; ++nsylr; } } } /* for (k) */ *ttsyl /= nn; } /* dark */ cluster/src/sildist.c0000644000175100001440000000326312553131040014355 0ustar hornikusers/* Donated by Francois Romain */ #include /* fmax2() */ #include #include "cluster.h" void sildist(double *d, /* distance : in matrix or dist format; i.e., of length n^2 or n*(n-1)/2; see 'ismat' */ int *n, /* number of Subjects (attr(d,'Size')) */ int *clustering,/* clustering vector, values from {1..k} */ int *k, /* number of clusters */ double *diC, /* diC */ int *counts, /* counts[k] := #{cluster k} */ double *si, /* (a_i - b_i) / max(ai,bi) */ int *neighbor, /* neighbor */ int *ismat) /* boolean : is 'd' a matrix (1) or a dist vector (0) */ { int i,j,l, ci; Rboolean computeSi ; /* do we compute si[i] */ double ai, bi ; for(i = 0, l = 0; i < *n; i++) { ci = clustering[i] - 1; counts[ci]++; if(*ismat) l = (*n)*i + i+1; for(j = i+1; j < *n; j++, l++) { int cj = clustering[j]-1; diC[(*k)*i + cj] += d[l]; diC[(*k)*j + ci] += d[l]; } } for(i = 0; i < *n; i++) { int ki = (*k)*i; ci = clustering[i] - 1; computeSi = TRUE; for(j=0; j < *k; j++) { if(j == ci) { if(counts[j] == 1) /* only one subject in the cluster */ computeSi = FALSE; else diC[ki + j] /= (counts[j]-1); } else { diC[ki + j] /= counts[j]; } } ai = diC[ki+ci]; /* bi = min_C diC : */ if(ci == 0) { /* cluster #1 */ bi = diC[ki+1]; neighbor[i] = 2; } else { bi = diC[ki]; neighbor[i] = 1; } for(j = 1; j < *k; j++) if(j != ci) { if(bi > diC[ki + j]) { bi = diC[ki + j]; neighbor[i] = j+1; } } si[i] = (computeSi && (bi != ai)) ? (bi - ai) / fmax2(ai, bi) : 0.; } } cluster/src/daisy.f0000644000175100001440000001004612553131040014013 0ustar hornikusers subroutine cldaisy(nn,jpp,x,valmd,weights, + jtmd,jdat,vtype,ndyst,mdata,disv) c c c c Calculating dissimilarities between objects or variables c c integer nn, jpp c c nn = number of objects c c jpp = number of variables used for the calculations c c The following vectors and matrices must be dimensioned in the c c main program : double precision x(nn,jpp), valmd(jpp), weights(jpp) double precision disv(1+nn*(nn-1)/2) integer jtmd(jpp), jdat, vtype(jpp), ndyst, mdata c vtype was character originally c vtype(j) is the type of variable j: c = 1 (A) for an Asymmetric binary variable c = 2 (S) for a Symmetric binary variable c = 3 (N) for a Nominal variable c = 4 (O) for an Ordinal variable c = 5 (I) for an Interval variable (additive) c = 6 (T) for a raTio variable (log transformed) c vector jtmd is only read if there are missing values : if(mdata) c jtmd(j) = 0 if variable j is binary c = -1 if variable j is not binary and has missing values c = +1 if variable j is not binary and has no missing values c VAR double precision clk,dlk, pp,ppa, rpres integer j,k,l,la, lsubt, nlk, nbad, npres logical hasNA hasNA = (mdata .ne. 0) c calculation of the dissimilarities nlk=0 if(jdat .eq. 1) then c Case I: `mixed' type variables nbad=0 do 450 l=2,nn la=l-1 do 440 k=1,la nlk=nlk+1 ppa=0. dlk=0. c Dissimilarity between obs. l & k do 420 j=1,jpp if(vtype(j) .ge. 3) then if (hasNA) then if(jtmd(j).lt.0) then if(x(l,j).eq.valmd(j)) goto 420 if(x(k,j).eq.valmd(j)) goto 420 endif endif ppa=ppa + weights(j) if(vtype(j).eq.3) then if(x(l,j).ne.x(k,j)) dlk=dlk+ weights(j) else dlk=dlk+ weights(j)*dabs(x(l,j)-x(k,j)) endif else c binary variable x(*,j) if(x(l,j).ne.0..and.x(l,j).ne.1.) goto 420 if(x(k,j).ne.0..and.x(k,j).ne.1.) goto 420 if(vtype(j).eq.2.or.x(l,j).ne.0.or.x(k,j).ne.0) * ppa=ppa+weights(j) if(x(l,j).ne.x(k,j)) dlk=dlk+ weights(j) endif 420 continue if(ppa.le.0.5) then nbad=nbad+1 disv(nlk)=-1 else disv(nlk)=dlk/ppa endif 440 continue 450 continue else c Case II : jdat != 1: all variables are interval scaled c ------- ~~~~~~~~~ { basically === dysta() in ./dysta.f c FIXME: common code! } pp=jpp do 600 l=2,nn lsubt=l-1 do 520 k=1,lsubt clk=0.0 nlk=nlk+1 npres=0 do 530 j=1,jpp if (hasNA) then if(jtmd(j).lt.0) then if(x(l,j).eq.valmd(j)) goto 530 if(x(k,j).eq.valmd(j)) goto 530 endif endif npres=npres+1 if(ndyst.eq.1) then clk=clk+ (x(l,j)-x(k,j))*(x(l,j)-x(k,j)) else clk=clk+ dabs(x(l,j)-x(k,j)) endif 530 continue rpres=npres if(npres.eq.0)then disv(nlk)=-1.0 else if(ndyst.eq.1) then disv(nlk)=dsqrt(clk*(pp/rpres)) else disv(nlk)=clk*(pp/rpres) endif 520 continue 600 continue endif end cluster/src/mona.f0000644000175100001440000001345412553131040013642 0ustar hornikusers subroutine clmona(nn,pp, x,jerr, nban,ner,kwan,lava, jlack) cc cc MONothetic Analysis cc cc Program for divisive hierarchical clustering of binary data, cc using association analysis. cc cc list of functions and subroutines: cc function kab c Args integer nn, pp, jerr c nn = number of objects c pp = number of variables c jerr : error return code in {1,2,3,4} integer x(nn,pp), jlack(pp), nban(nn),ner(nn),kwan(nn),lava(nn) c x[i,j]: binary (0/1/NA) data (obs. i, var.j) c where "NA", missing values, are all values > 1 c Function called: integer kab c VARs logical syn integer j, ja, jb, jnat, jma, jtel, jtelz, jtel2, jres integer j0,j1, jptwe integer a, b, c, d, k, ka, kb, kal, kalf, km integer l, lbb, laa, lcc, ldd, lee, lack,lama, lams integer nel, nelbb, nzf, nhalf, npass, nclu, myst, mysca c--begin{-Wall} a=0 b=0 c=0 d=0 jma=0 jtel=0 jtelz=0 lcc=0 nelbb=0 c-- end{-Wall} nhalf=(nn+1)/2 jptwe=(pp+4)/5 myst=0 do 70 l=1,nn mysca=0 do 60 j=1,pp if(x(l,j) .gt. 1) mysca=mysca+1 60 continue if(mysca .eq. pp) then c all variables missing for this object jerr=1 return endif myst=myst+mysca 70 continue if(myst.eq.0)go to 290 lack=0 do 100 j=1,pp j0=0 j1=0 do 80 l=1,nn if(x(l,j).eq.0) j0=j0+1 if(x(l,j).eq.1) j1=j1+1 80 continue jlack(j)=nn-j0-j1 if(jlack(j).ne.0) lack=lack+1 if(jlack(j).ge.nhalf) then c at least 50% of the objects have missing values for this variable jerr=2 return endif if(j0.eq.0 .or. j1.eq.0) then c all non missing values are identical for this variable jerr=3 return endif 100 continue if(lack .eq. pp) then c all variables have missing values jerr=4 return endif cc cc filling in missing values cc do 260 j=1,pp if(jlack(j) .ne. 0) then lama=-1 syn=.true. do 240 ja=1,pp if(jlack(ja) .eq. 0) then c no missing in x[, ja] a=0 b=0 c=0 d=0 do 230 k=1,nn if(x(k,j).eq.1)go to 220 if(x(k,ja).eq.0)a=a+1 if(x(k,ja).eq.1)b=b+1 go to 230 220 if(x(k,ja).eq.0)c=c+1 if(x(k,ja).eq.1)d=d+1 230 continue kal=a*d - b*c kalf=kab(kal) if(kalf.ge.lama)then lama=kalf jma=ja if(kal.lt.0) syn=.false. endif endif 240 continue do 250 l=1,nn if(x(l,j).gt. 1) then c missing if(syn) then x(l,j)=x(l,jma) else if(x(l,jma).eq.1) x(l,j)=0 if(x(l,jma).eq.0) x(l,j)=1 endif endif 250 continue endif 260 continue c--- end of treating missing values ---- cc cc initialization cc 290 do 300 k=1,nn kwan(k)=0 ner(k)=k lava(k)=0 300 continue npass=1 kwan(1)=nn cc cc algorithm cc nclu=1 ka=1 C --- Loop --- 310 kb=ka+kwan(ka)-1 lama=-1 jnat=pp do 370 j=1,pp if(nclu.eq.1)go to 330 j0=0 j1=0 do 325 l=ka,kb nel=ner(l) if(x(nel,j).eq.0) j0=j0+1 if(x(nel,j).eq.1) j1=j1+1 325 continue if(j1.eq.0)go to 370 if(j0.eq.0)go to 370 330 jnat=jnat-1 lams=0 do 360 jb=1,pp if(jb .ne. j) then a=0 b=0 c=0 d=0 do 350 l=ka,kb nel=ner(l) if(x(nel,j).eq. 0) then if(x(nel,jb).eq.0) a=a+1 if(x(nel,jb).eq.1) b=b+1 else if(x(nel,jb).eq.0) c=c+1 if(x(nel,jb).eq.1) d=d+1 endif 350 continue lams=lams+kab(a*d - b*c) endif 360 continue if(lama .lt. lams) then lama=lams jtel =c+d jtelz=a+b jma=j endif 370 continue if(jnat.lt.pp)go to 375 kwan(ka)=-kwan(ka) go to 400 cc cc splitting cc 375 nel=ner(ka) if(x(nel,jma).eq.1)then nzf=0 jtel2=jtel else nzf=1 jtel2=jtelz endif jres=kb-ka+1-jtel2 km=ka+jtel2 l=ka c -- inner loop -- 378 nel=ner(l) if(x(nel,jma).eq.nzf)go to 380 l=l+1 if(l.lt.km)go to 378 go to 390 380 do 381 lbb=l,kb nelbb=ner(lbb) if(x(nelbb,jma) .ne. nzf) then lcc=lbb-1 go to 382 endif 381 continue 382 do 383 laa=l,lcc ldd=lcc+l-laa lee=ldd+1 ner(lee)=ner(ldd) 383 continue ner(l)=nelbb go to 378 390 nclu=nclu+1 nban(km)=npass kwan(ka)=jtel2 kwan(km)=jres lava(km)=jma ka=ka+kwan(ka) 400 if(kb.eq.nn)go to 500 410 ka=ka+kab(kwan(ka)) if(ka.gt.nn)go to 500 if(kwan(ka).lt.2)go to 410 go to 310 500 npass=npass+1 do 510 ka=1,nn if(kwan(ka).ge.2) go to 310 510 continue end cc cc kab(j) = |j| cc integer function kab(j) integer j kab=j if(j.lt.0) kab=-j return end cluster/src/fanny.c0000644000175100001440000003076412553131040014023 0ustar hornikusers/* FANNY : program for Fuzzy cluster ANalysis */ /* was $Id: fanny.c 6869 2015-01-26 13:30:42Z maechler $ * fanny.f -- translated by f2c (version 20020621). * and treated by f2c-clean v 1.10, and manually by Martin Maechler */ #include #include /* for diagnostics */ #include "cluster.h" /* dysta3() is in cluster.h ! */ static void fuzzy(int nn, int k, double *p, double *dp, double *pt, double *dss, double *esp, double *ef, double *obj, double r, double tol, int *nit, int trace_lev); static void caddy(int nn, int k, double *p, int *ktrue, int *nfuzz, int *ncluv, double *rdraw, int trace_lev); static void fygur(int kk, int nn, int ncluv[], double dss[], double s, int *nsend, int *nelem, int *negbr, double *syl, double *srank, double *avsyl, double *ttsyl, double *sylinf); void cl_fanny(int *nn, /* = number of objects */ int *jpp, /* = number of variables for clustering */ int *kk, /* = number of clusters */ double *x, double *dss, int *jdyss, double *valmd, int *jtmd, int *ndyst, int *nsend, int *nelem, int *negbr, double *syl, double *p, double *dp, double *pt, int *nfuzz, double *esp, double *ef, double *dvec, double *ttsyl, double *obj, /* input/output; see fuzzy() below */ int *ncluv, double *sylinf, double *r, double *tol, int *maxit) { int ktrue, trace_lev = (int) obj[1]; Rboolean all_stats = (obj[0] == 0.);/* TODO: consider *not* doing caddy() */ if (*jdyss != 1) { /* compute dissimilarities from data */ int jhalt = 0; dysta3(nn, jpp, x, dss, ndyst, jtmd, valmd, &jhalt); if (jhalt) { *jdyss = -1; return; } } fuzzy(*nn, *kk, p, dp, pt, dss, esp, ef, obj, *r, *tol, maxit, trace_lev); caddy(*nn, *kk, p, /* -> */ &ktrue, nfuzz, ncluv, pt, trace_lev); obj[0] = (double) ktrue; /* Compute "silhouette": */ if (all_stats && 2 <= ktrue && ktrue < *nn) { int i, nhalf = *nn * (*nn - 1) / 2; double s = 0.; /* s := max( dss[i,j] ) */ for(i = 0; i < nhalf; i++) if (s < dss[i]) s = dss[i]; fygur(ktrue, *nn, ncluv, dss, s, nsend, nelem, negbr, syl, dvec, pt, ttsyl, sylinf); } return; } /* cl_fanny */ void dysta3(int *nn, int *p, double *x, double *dys, int *ndyst, int *jtmd, double *valmd, int *jhalt) { int k, l, nlk, x_d = *nn; nlk = 0; for (l = 0; l < (*nn - 1); ++l) { for (k = l + 1; k < *nn; ++k, ++nlk) { double clk = 0.; int j, jj, npres = 0; for (j = 0, jj = 0; j < *p; j++, jj+=x_d) { double d; if (jtmd[j] < 0) { if (x[l + jj] == valmd[j] || x[k + jj] == valmd[j]) continue; /* next j */ } ++npres; d = x[l + jj] - x[k + jj]; if (*ndyst != 2) /* 1 or 3 */ clk += d * d; else /* if (*ndyst == 2) */ clk += fabs(d); } if (npres == 0) { dys[nlk] = -1.; *jhalt = 1; } else { clk *= (*p) / (double) npres; dys[nlk] = (*ndyst == 1) ? sqrt(clk) : /*ndyst = 2 or 3 */ clk; } } } } /* dysta3 */ static void fuzzy(int nn, int k, double *p, double *dp, double *pt, double *dss, double *esp, double *ef, double *obj,/* of length 4; * in : (cluster_only, trace_lev, compute_p, 0) * out: (ktrue , cryt, PC ("dunn"), normalized_PC) */ double r, /* the exponent, > 1. -- was fixed to 2 originally */ double tol,/* the precision for the iterations */ int *nit, /* the maximal number of iterations -- originally fixed to 500 */ int trace_lev) { double dt, xx, ddd, crt, reen, cryt; int p_d = nn, dp_d = nn; int i, j, m, mi, it; Rboolean converged = FALSE, compute_p = (int)obj[2]; if(trace_lev) Rprintf("fanny()'s fuzzy(n = %d, k = %d):\n", nn, k); if(compute_p) { /* Compute initial fuzzy clustering, i.e. membership matrix p[,] */ int nd, ndk; double p0 = 0.1 / (k - 1); for (m = 0; m < nn; ++m) for (j = 0; j < k; ++j) p[m + j * p_d] = p0; ndk = nn / k; nd = ndk; j = 0; for (m = 0; m < nn; ++m) { int jj; p[m + j * p_d] = 0.9; if (m+1 >= nd) { ++j; if (j+1 == k) /* reset */ nd = nn; else nd += ndk; } for (jj = 0; jj < k; ++jj) p[m + jj * p_d] = pow(p[m + jj * p_d], r); } } else { /* p[,] already contains memberships */ for (m = 0; m < nn; ++m) for (j = 0; j < k; ++j) p[m + j * p_d] = pow(p[m + j * p_d], r); } /* initial criterion value */ cryt = 0.; for (j = 0; j < k; ++j) { esp[j] = 0.; ef[j] = 0.; for (m = 0; m < nn; ++m) { esp[j] += p[m + j * p_d]; for (i = 0; i < nn; ++i) { if (i != m) { mi = imin2(m,i); mi = mi * nn - (mi + 1) * (mi + 2) / 2 + imax2(m,i); dp[m + j * dp_d] += p[i + j * p_d] * dss[mi]; ef[j] += p[i + j * p_d] * p[m + j * p_d] * dss[mi]; } } } cryt += ef[j] / (esp[j] * 2.); } crt = cryt; if(trace_lev) { Rprintf("fuzzy(): initial obj = %g\n", cryt); if(trace_lev >= 2) { Rprintf(" ef[]= ("); for(j=0; j < k; j++) Rprintf(" %g%s", ef[j], ((j < k-1)? "," : ")\n")); Rprintf(" esp[]= ("); for(j=0; j < k; j++) Rprintf(" %g%s",esp[j], ((j < k-1)? "," : ")\n")); } } reen = 1. / (r - 1.); it = 0; while(++it <= *nit) { /* . . . . . iterations . . . . . . . . . . . . . */ for(m = 0; m < nn; m++) { /* the new membership coefficients of the objects are calculated, and the resulting value of the criterion is computed. */ dt = 0.; for (j = 0; j < k; ++j) { pt[j] = pow(esp[j] / (dp[m + j * dp_d] - ef[j] / (2 * esp[j])), reen); dt += pt[j]; } xx = 0.; for (j = 0; j < k; ++j) { pt[j] /= dt; if (pt[j] < 0.) xx += pt[j]; } /* now: sum_j (pt[j]) == 1; xx := sum_{pt[j] < 0} pt[j] */ for (j = 0; j < k; ++j) { double d_mj; pt[j] = (pt[j] > 0.) ? pow(pt[j] / (1 - xx), r) : 0.; d_mj = pt[j] - p[m + j * p_d]; esp[j] += d_mj; for (i = 0; i < nn; ++i) { if (i != m) { mi = imin2(m,i); mi = mi * nn - (mi + 1) * (mi + 2) / 2 + imax2(m,i); ddd = d_mj * dss[mi]; dp[i + j * dp_d] += ddd; ef[j] += p[i + j * p_d] * 2. * ddd; } } p[m + j * p_d] = pt[j]; } if(trace_lev >= 3) { Rprintf(" pt[m= %d, *]: ",m); for (j = 0; j < k; ++j) Rprintf(" %g%s", pt[j], ((j < k-1)? "," : "\n")); } } /* m == nn */ cryt = 0.; for (j = 0; j < k; ++j) cryt += ef[j] / (esp[j] * 2.); if(trace_lev >= 2) Rprintf(" m == n: obj = %#20.14g", cryt); /* Convergence check */ if((converged = (fabs(cryt - crt) <= tol * cryt))) break; if(trace_lev >= 2) Rprintf(" not converged: it = %d\n", it); crt = cryt; } /* while */ *nit = (converged)? it : -1; if(trace_lev) { Rprintf("%s%sonverged after %d iterations, obj = %#20.*g\n", trace_lev >=2 ? "\n" : "", (converged) ? "C" : "NOT c", it, (int)((trace_lev >= 2)? 20 : 7), cryt); } /* obj[0] = (double) it; << no longer; return it via *nit ! */ obj[1] = cryt; /* PC (partition coefficient), "non-fuzzyness index" of libert is computed * C = 1/n sum_{i,j} u_{i,j} ^ r fulfills * 1 >= C >= sum_j (1/k)^r = k * k^-r = k^(1-r) * ==> normalization (C - k^(1-r)) / (1 - k^(1-r)) = (k^(r-1) * C - 1) / (k^(r-1) - 1) */ for (j = 0, crt = 0.; j < k; ++j) crt += esp[j]; crt /= nn; obj[2] = crt; /* the PC */ xx = pow((double)k, r - 1.); obj[3] = (xx * crt - 1.) / (xx - 1.); /* Note however, that for r != 2, MM rather prefers to use * the "original definition" C = 1/n sum_{i,j} u_{i,j} ^ 2, and its normalization */ /* p[m,j] := (u_{m,j} ^ r) ^{1/r} == u_{m,j} : */ xx = 1. / r; for (m = 0; m < nn; ++m) for (j = 0; j < k; ++j) p[m + j * p_d] = pow(p[m + j * p_d], xx); } /* fuzzy */ static void caddy(int nn, int k, double *p, int *ktrue, int *nfuzz, int *ncluv, double *rdraw, int trace_lev) { Rboolean stay; int i, m, ktry, nbest; double pbest; if(trace_lev) Rprintf("fanny()'s caddy(*, k = %d):\n", k); pbest = p[0]; nbest = 1; for (i = 1; i < k; ++i) { if (pbest < p[i * nn]) { pbest = p[i * nn]; nbest = i+1; } } nfuzz[0] = nbest; ncluv[0] = 1; *ktrue = 1; for (m = 1; m < nn; ++m) { pbest = p[m]; nbest = 1; for (i = 1; i < k; ++i) { if (pbest < p[m + i * nn]) { pbest = p[m + i * nn]; nbest = i+1; } } stay = FALSE; for (ktry = 0; ktry < *ktrue; ++ktry) { if (nfuzz[ktry] == nbest) { stay = TRUE; ncluv[m] = ktry+1; break; } } if (! stay) { nfuzz[*ktrue] = nbest; (*ktrue)++; ncluv[m] = *ktrue; } } if(trace_lev) Rprintf(" -> k_true (crisp) = %d", *ktrue); if (*ktrue < k) { int kwalk, kleft; if(trace_lev) Rprintf(" < k (= %d) !!\n", k); for (kwalk = *ktrue; kwalk < k; ++kwalk) { for (kleft = 1; kleft <= k; ++kleft) { stay = FALSE; for (ktry = 0; ktry < kwalk; ++ktry) { if (nfuzz[ktry] == kleft) { stay = TRUE; break; } } if (! stay) { nfuzz[kwalk] = kleft; break; } } } } else if(trace_lev) Rprintf("\n"); for (m = 0; m < nn; ++m) { for (i = 0; i < k; ++i) rdraw[i] = p[m + (nfuzz[i]-1) * nn]; for (i = 0; i < k; ++i) p[m + i * nn] = rdraw[i]; } return; } /* caddy */ /* ----------------------------------------------------------- Compute Silhouette Information : TODO cleanup: this is almost identical to dark() in ./pam.c -- difference : different dys[] / dss[] indexing, but that -- dss[] indexing change needs to be "synchronized" in all functions here */ static void fygur(int kk, int nn, int ncluv[], double dss[], double s, int *nsend, int *nelem, int *negbr, double *syl, double *srank, double *avsyl, double *ttsyl, double *sylinf) { int sylinf_d = nn; /* sylinf[nn, 4] */ int j, l, k, k_, nj, ntt, nsylr; double dysa, dysb; /* pointers to sylinf[] columns:*/ double *sylinf_2, *sylinf_3, *sylinf_4; sylinf_2 = sylinf + sylinf_d; sylinf_3 = sylinf_2 + sylinf_d; sylinf_4 = sylinf_3 + sylinf_d; /* Parameter adjustments */ --avsyl; --ncluv; --dss; nsylr = 0; *ttsyl = 0.; for (k = 1; k <= kk; ++k) { /* nelem[0:(ntt-1)] := indices (1-based) of obs. in cluster k : */ ntt = 0; for (j = 1; j <= nn; ++j) { if (ncluv[j] == k) { nelem[ntt] = j; ++ntt; } } for (j = 0; j < ntt; ++j) {/* (j+1)-th obs. in cluster k */ nj = nelem[j]; dysb = s * 1.1 + 1.; negbr[j] = -1; /* for all clusters k_ != k : */ for (k_ = 1; k_ <= kk; ++k_) if (k_ != k) { int nbb = 0; double db = 0.; for (l = 1; l <= nn; ++l) { if (ncluv[l] == k_) { ++nbb; if (l < nj) { db += dss[nn * (l - 1) + nj - l * (l + 1) / 2]; } else if (l > nj) { db += dss[nn * (nj - 1) + l - nj * (nj + 1) / 2]; } /* else dss(.)=0 ; nothing to add */ } } db /= nbb; /* now db(k_) := mean( d[j, l]; l in C_{k_} ) */ if (dysb > db) { dysb = db; negbr[j] = k_; } }/* negbr[j] := arg min_{k_} db(k_) */ if (ntt > 1) { dysa = 0.; for (l = 0; l < ntt; ++l) { int nl = nelem[l]; if (nj < nl) { dysa += dss[nn * (nj - 1) + nl - nj * (nj + 1) / 2]; } else if (nj > nl) { dysa += dss[nn * (nl - 1) + nj - nl * (nl + 1) / 2]; }/* else dss(.)=0 ; nothing to add */ } dysa /= ntt - 1; if (dysa > 0.) { if (dysb > 0.) { if (dysb > dysa) syl[j] = 1. - dysa / dysb; else if (dysb < dysa) syl[j] = dysb / dysa - 1.; else /* dysb == dysa: */ syl[j] = 0.; if (syl[j] < -1.) syl[j] = -1.; else if (syl[j] > 1.) syl[j] = 1.; } else { syl[j] = -1.; } } else /* dysa == 0 */ if (dysb > 0.) syl[j] = 1.; else syl[j] = 0.; } else { /* ntt == 1: */ syl[j] = 0.; } } /* for( j ) */ avsyl[k] = 0.; for (j = 0; j < ntt; ++j) { int lang = 0 /* -Wall */; double symax = -2.; for (l = 0; l < ntt; ++l) { if (symax < syl[l]) { symax = syl[l]; lang = l; } } nsend[j] = lang; srank[j] = symax; /* = syl[lang] */ avsyl[k] += srank[j]; syl[lang] = -3.; } *ttsyl += avsyl[k]; avsyl[k] /= (double) ntt; if (ntt < 2) { sylinf [nsylr] = (double) k; sylinf_2[nsylr] = (double) negbr[0]; sylinf_3[nsylr] = 0.; sylinf_4[nsylr] = (double) nelem[0]; ++nsylr; } else { for (j = 0; j < ntt; ++j) { nj = nsend[j]; sylinf [nsylr] = (double) k; sylinf_2[nsylr] = (double) negbr[nj]; sylinf_3[nsylr] = srank[j]; sylinf_4[nsylr] = (double) nelem[nj]; ++nsylr; } } } /* for (k) */ *ttsyl /= nn; } /* fygur */ cluster/src/cluster.h0000644000175100001440000001667212553131040014400 0ustar hornikusers/* Declare everything, Fortran & C -- so we can register them */ #include #include /* -> Rconfig.h, but also Boolean.h RS.h */ #ifdef ENABLE_NLS #include #define _(String) dgettext ("cluster", String) #else #define _(String) (String) #endif /* --------- ./clara.c ------------------*/ double randm(int *nrun); void cl_clara(int *n, /* = number of objects */ int *jpp,/* = number of variables */ int *kk, /* = number of clusters, 1 <= kk <= n-1 */ double *x, /* Input: the data x[n, jpp] _rowwise_ (transposed) * Output: the first `n' values are the `clustering' * (integers in 1,2,..,kk) */ int *nran, /* = #{random samples} drawn (= `samples' in R)*/ int *nsam, /* = #{objects} drawn from data set (`sampsize' in R) */ double *dys,/* [1:(1 + (nsam * (nsam - 1))/2)] * Output: to contain the distances */ int *mdata, /*= {0,1}; 1: min(x) is missing value (NA); 0: no NA */ double *valmd,/*[j]= missing value code (instead of NA) for x[,j]*/ int *jtmd, /* [j]= {-1,1}; -1: x[,j] has NA; 1: no NAs in x[,j] */ int *diss_kind,/* = {1,2}; 1 : euclidean; 2 : manhattan*/ int/*logical*/ *rng_R,/*= {0,1}; 0 : use clara's internal weak RNG; * 1 : use R's RNG (and seed) */ int/*logical*/ *pam_like,/* if (1), we do "swap()" as in pam();*/ // otherwise use the code as it was in clara() "forever" upto 2011-04 int *nrepr, int *nsel, int *nbest,/* x[nbest[j]] will be the j-th obs in the final sample */ int *nr, int *nrx, double *radus, double *ttd, double *ratt, double *ttbes, double *rdbes, double *rabes, int *mtt, double *obj, double *avsyl, double *ttsyl, double *sylinf, int *jstop, int *trace_lev, double *tmp, /* = double [ 3 * nsam ] */ int *itmp /* = integer[ 6 * nsam ] */ ); void dysta2(int nsam, int jpp, int *nsel, double *x, int n, double *dys, int diss_kind, int *jtmd, double *valmd, Rboolean has_NA, Rboolean *toomany_NA); void bswap2(int kk, int nsam, double s, const double dys[], Rboolean pam_like, int trace_lev, // result: double *sky, int *nrepr, double *dysma, double *dysmb, double *beter); void selec(int kk, int n, int jpp, int diss_kind, double *zb, int nsam, Rboolean has_NA, int *jtmd, double *valmd, int trace_lev, int *nrepr, int *nsel, double *dys, double *x, int *nr, Rboolean *nafs, double *ttd, double *radus, double *ratt, int *nrnew, int *nsnew, int *npnew, int *ns, int *np, int *new, double *ttnew, double *rdnew); void resul(int kk, int n, int jpp, int diss_kind, Rboolean has_NA, int *jtmd, double *valmd, double *x, int *nrx, int *mtt); void black(int kk, int jpp, int nsam, int *nbest, double *dys, double s, double *x, /* --> Output : */ double *avsyl, double *ttsyl, double *sylinf, int *ncluv, int *nsend, int *nelem, int *negbr, double *syl, double *srank); /* -------- ./dysta.f --- (was in pam.f) -------------------- */ int F77_NAME(dysta)(int *nn, int *jpp, double *x, double *dys, int *ndyst, int *jtmd, double *valmd, int *jhalt); /* --------- ./pam.c ------------------*/ void cl_pam(int *nn, int *jpp, int *kk, double *x, double *dys, int *jdyss, /* jdyss = 0 : compute distances from x * = 1 : distances provided in x */ double *valmd, int *jtmd, int *ndyst, int *nsend, int *nrepr, int *nelem, double *radus, double *damer, double *ttd, double *separ, double *ttsyl, double *obj, int *med, int *ncluv, double *clusinf, double *sylinf, int *nisol, int* optim); SEXP cl_Pam(SEXP k_, SEXP n_, SEXP do_diss_, /* == !diss; if true, compute distances from x (= x_or_diss); otherwise distances provided by x_or_diss */ SEXP x_or_diss,// this "is" if(do_diss) "x[]" (n x p) else "dys[]" SEXP all_stats_, // all_stats == !cluster.only SEXP medoids, // NULL or integer(k) subset {1:n} SEXP do_swap_, SEXP trace_lev_, SEXP keep_diss_, SEXP pam_once_, // the next 3 are only needed if(do_diss) SEXP val_md, SEXP j_md, // "md" := [m]issing [d]ata SEXP dist_kind); // = 1 ("euclidean") or 2 ("manhattan") void bswap(int kk, int nsam, int *nrepr, /* nrepr[]: here is boolean (0/1): 1 = "is representative object" */ Rboolean med_given, Rboolean do_swap, int trace_lev, double *dysma, double *dysmb, double *beter, const double dys[], double s, double *obj, int pamonce); void cstat(int kk, int nn, int *nsend, int *nrepr, Rboolean all_stats, double *radus, double *damer, double *ttd, double *separ, double *s, double *dys, int *ncluv, int *nelem, int *med, int *nisol); void dark(int kk, int nn, const int ncluv[], const double dys[], double s, int *nsend, int *nelem, int *negbr, double *syl, double *srank, double *avsyl, double *ttsyl, double *sylinf); /* --------- ./spannel.c ------------------*/ void cl_sweep(double *, int *, int *, int *, double *); void spannel(int *ncas, /* = number of objects */ int *ndep, /* = number of variables */ double *dat,/* [ncas, 0:ndep] */ double *dstopt, /* = squared distances [1:ncas] */ double *cov,/* matrix [0:ndep, 0:ndep] */ double *varsum, /* [1:ndep] */ double *varss, /* [1:ndep] */ double *prob, /* [1:ncas] */ double *work, /* [0:ndep] */ double *eps, int *maxit, /* = maximal # iterations (and returns #{iter.})*/ int *ierr); void sildist(double *d, /* distance : in matrix or dist format; i.e., of length n^2 or n*(n-1)/2; see 'ismat' */ int *n, /* number of Subjects (attr(d,'Size')) */ int *clustering,/* clustering vector, values from {1..k} */ int *k, /* number of clusters */ double *diC, /* diC */ int *counts, /* counts[k] := #{cluster k} */ double *si, /* (a_i - b_i) / max(ai,bi) */ int *neighbor, /* neighbor */ int *ismat); /* boolean : is 'd' a matrix or 'dist' ? */ void cl_fanny(int *nn, int *jpp, int *kk, double *x, double *dss, int *jdyss, double *valmd, int *jtmd, int *ndyst, int *nsend, int *nelem, int *negbr, double *syl, double *p, double *dp, double *pt, int *nfuzz, double *esp, double *ef, double *dvec, double *ttsyl, double *obj, int *ncluv, double *sylinf, double *r, double *tol, int *maxit); /* ================= Fortran things (remainder) ======================== */ /* -------- ./daisy.f ---------------------------------- */ int F77_NAME(cldaisy)(int *nn, int *jpp, double *x, double *valmd, int *jtmd, int *jdat, int *vtype, int *ndyst, int *mdata, double *disv); /* -------- ./fanny.c ---------------------------------- */ /* R-level: called only from ../tests/dysta-ex.R (now via .C()): */ void dysta3(int *nn, int *p, double *x, double *dys, int *ndyst, int *jtmd, double *valmd, int *jhalt); /* -------- ./mona.f ---------------------------------- */ int F77_NAME(clmona)(int *nn, int *pp, int *x, int *jerr, int *nban, int *ner, int *kwan, int *lava, int *jlack); /* -------- ./twins.c ---------------------------------- */ void R_bncoef(int *nn, double *ban, double *cf); double bncoef(int nn, double *ban); void twins(int *nn, int *jpp, double *x, double *dys, double *dys2, int *jdyss, double *valmd, int *jtmd, int *ndyst, int *jalg, int *method, int *kwan, int *ner, double *ban, double *coef, double *alpha, int *merge, int *trace_lev); cluster/src/clara.c0000644000175100001440000006332112553131040013765 0ustar hornikusers/* Clustering LARge Applications ~ ~~~ ~ Clustering program based upon the k-medoid approach, and suitable for data sets of at least 100 objects. (for smaller data sets, please use program pam.) */ /* original Id: clara.f,v 1.10 2002/08/27 15:43:58 maechler translated by * f2c (version 20010821) and run through f2c-clean,v 1.10 2002/03/28 */ #include #include /* for diagnostics */ #include /* when R's RNG is used */ #include /* for interrupting */ #include "cluster.h" #include "ind_2.h" void cl_clara(int *n, /* = number of objects */ int *jpp,/* = number of variables */ int *kk, /* = number of clusters, 1 <= kk <= n-1 */ double *x, /* Input: the data x[n, jpp] _rowwise_ (transposed) * Output: the first `n' values are the `clustering' * (integers in 1,2,..,kk) */ int *nran, /* = #{random samples} drawn (= `samples' in R)*/ int *nsam, /* = #{objects} drawn from data set (`sampsize' in R) */ double *dys,/* [1:(1 + (nsam * (nsam - 1))/2)] * Output: to contain the distances */ int *mdata, /*= {0,1}; 1: min(x) is missing value (NA); 0: no NA */ double *valmd,/*[j]= missing value code (instead of NA) for x[,j]*/ int *jtmd, /* [j]= {-1,1}; -1: x[,j] has NA; 1: no NAs in x[,j] */ int *diss_kind,/*= {1,2}; 1 : euclidean; 2 : manhattan*/ int/*logical*/ *rng_R,/*= {0,1}; 0 : use clara's internal weak RNG; * 1 : use R's RNG (and seed) */ int/*logical*/ *pam_like,/* if (1), we do "swap()" as in pam();*/ // otherwise use the code as it was in clara() "forever" upto 2011-04 int *nrepr, /* logical (0/1): 1 = "is representative object" */ int *nsel, int *nbest,/* x[nbest[j],] : the j-th obs in the final sample */ int *nr, int *nrx,/* prov. and final "medoids" aka representatives */ double *radus, double *ttd, double *ratt, double *ttbes, double *rdbes, double *rabes, int *mtt, double *obj, double *avsyl, double *ttsyl, double *sylinf, int *jstop, int *trace_lev, double *tmp, /* = double [ 3 * nsam ] */ int *itmp /* = integer[ 6 * nsam ] */ ) { #define tmp1 tmp #define tmp2 &tmp[*nsam] #define ntmp1 itmp #define ntmp2 &itmp[*nsam] #define ntmp3 &itmp[nsamb] #define ntmp4 &itmp[nsamb+ *nsam] #define ntmp5 &itmp[2*nsamb] #define ntmp6 &itmp[2*nsamb+ *nsam] /* Local variables */ Rboolean nafs, kall, full_sample, lrg_sam, dyst_toomany_NA, has_NA = *mdata; int j, jk, jkk, js, jsm, jran, l, n_sam; int nsm, ntt, rand_k, nrun, n_dys, nsamb, nunfs; double rnn, sky, zb, s, sx = -1., zba = -1.;/* Wall */ *jstop = 0; rnn = (double) (*n); /* n_dys := size of distance array dys[] */ n_dys = *nsam * (*nsam - 1) / 2 + 1;/* >= 1 */ full_sample = (*n == *nsam);/* only one sub sample == full data */ nsamb = *nsam * 2; lrg_sam = (*n < nsamb);/* sample more than *n/2 */ if (lrg_sam)/* generate indices for the other, smaller half */ n_sam = *n - *nsam; else n_sam = *nsam; if(*trace_lev) Rprintf("C clara(): (nsam,nran,n) = (%d,%d,%d);%s\n", *nsam, *nran, *n, full_sample ? " 'full_sample',": (lrg_sam ? " 'large_sample',": "")); if(*rng_R && !full_sample) GetRNGstate(); else /* << initialize `random seed' of the very simple randm() below */ nrun = 0; #define NEW_rand_k_trace_print(_nr_) \ rand_k= 1+ (int)(rnn* ((*rng_R)? unif_rand(): randm(&nrun))); \ if (rand_k > *n) {/* should never happen */ \ warning(_("C level clara(): random k=%d > n **\n"), rand_k); \ rand_k = *n; \ } \ if(*trace_lev >= 4) { \ Rprintf("... {" #_nr_ "}"); \ if(*rng_R) Rprintf("R unif_rand()"); \ else Rprintf("nrun=%5d", nrun); \ Rprintf(" -> k{ran}=%d\n", rand_k); \ } /* __LOOP__ : random subsamples are drawn and partitioned into kk clusters */ kall = FALSE; /* kall becomes TRUE iff we've found a "valid sample", i.e. one for which all d(j,k) can be computed */ nunfs = 0; dyst_toomany_NA = FALSE; for (jran = 1; jran <= *nran; ++jran) { if(*trace_lev) Rprintf("C clara(): sample %d ", jran); if (!full_sample) {/* `real' case: sample size < n */ ntt = 0; if (kall && nunfs+1 != jran && !lrg_sam) { /* Have had (at least) one valid sample; use its representatives * nrx[] : nsel[] := sort(nrx[]) for the first j=1:k */ if(*trace_lev >= 2) Rprintf(" if (kall && nunfs...): \n"); for (jk = 0; jk < *kk; ++jk) nsel[jk] = nrx[jk]; for (jk = 0; jk < *kk-1; ++jk) { /* sort(nsel[0:(kk-1)] */ /* FIXME: nsel[] is 0-indexed, but *contains* 1-indices*/ nsm = nsel[jk]; jsm = jk; for (jkk = jk + 1; jkk < *kk; ++jkk) { if (nsm > nsel[jkk]) { nsm = nsel[jkk]; jsm = jkk; } } nsel[jsm] = nsel[jk]; nsel[jk] = nsm; } ntt = *kk; } else { /* no valid sample _OR_ lrg_sam */ if(*trace_lev >= 2) Rprintf(" finding 1st... new k{ran}:\n"); /* Loop finding random index `rand_k' not yet in nrx[0:(*kk-1)] : */ L180: NEW_rand_k_trace_print(180) if (kall) { for (jk = 0; jk < *kk; ++jk) if (rand_k == nrx[jk]) goto L180; } /* end Loop */ nsel[ntt] = rand_k; if (++ntt == n_sam) goto L295; } if(*trace_lev >= 2) { Rprintf(".. kall: %s, ", (kall) ? "T" : "FALSE"); if(*trace_lev == 2) { Rprintf("nsel[ntt=%d] = %d\n", ntt, nsel[ntt]); } else { /* trace_lev >= 3 */ Rprintf("\n... nrx [0:%d]= ",*kk-1); for (jk = 0; jk < *kk; jk++) Rprintf("%d ",nrx[jk]); Rprintf("\n... nsel[0:%d]= ",ntt-1); for (jk = 0; jk < ntt; jk++) Rprintf("%d ",nsel[jk]); Rprintf("\n"); } } do { /* Loop finding random index 'rand_k' in {1:n}, * not in nrx[0:(k-1)] nor nsel[1:ntt] : */ L210: NEW_rand_k_trace_print(210) if (kall && lrg_sam) { for (jk = 0; jk < *kk; ++jk) { if (rand_k == nrx[jk]) goto L210; } } /* insert rand_k into nsel[1:ntt] or after and increase ntt : */ for (int ka = 0; ka < ntt; ++ka) if (nsel[ka] >= rand_k) { if (nsel[ka] == rand_k) goto L210; else {// nsel[ka] > rand_k : for (int na = ntt; na > ka; --na) nsel[na] = nsel[na-1]; nsel[ka] = rand_k; /* continue _outer_ loop */ goto L290; } } // else: rand_k > nsel[ka] for all ka = 0:(ntt-1) : nsel[ntt] = rand_k; L290: ++ntt; } while (ntt < n_sam); L295: if(*trace_lev) Rprintf(" {295} [ntt=%d, nunfs=%d] ", ntt, nunfs); if (lrg_sam) { /* have indices for smaller _nonsampled_ half; revert this: */ for (j = 1, jk = 0, js = 0; j <= *n; j++) { if (jk < n_sam && nsel[jk] == j) ++jk; else nrepr[js++] = j; } for (j = 0; j < *nsam; ++j) nsel[j] = nrepr[j]; } if(*trace_lev >= 3) { Rprintf(".. nsel[1:%d]= ", *nsam); for (jk = 0; jk < *nsam; jk++) Rprintf("%d ",nsel[jk]); } if(*trace_lev) Rprintf(" -> dysta2()\n"); } else { /* full_sample : *n = *nsam -- one sample is enough ! */ for (j = 0; j < *nsam; ++j) nsel[j] = j+1;/* <- uses 1-indices for its *values*! */ } dysta2(*nsam, *jpp, nsel, x, *n, dys, *diss_kind, jtmd, valmd, has_NA, &dyst_toomany_NA); if(dyst_toomany_NA) { if(*trace_lev) Rprintf(" dysta2() gave dyst_toomany_NA --> new sample\n"); dyst_toomany_NA = FALSE; ++nunfs; continue;/* random sample*/ } s = 0.; for(l = 1; l < n_dys; l++) /* dys[0] is not used here */ if (s < dys[l]) s = dys[l]; if(*trace_lev >= 2) Rprintf(". clara(): s:= max dys[1..%d] = %g;", l-1,s); bswap2(*kk, *nsam, s, dys, *pam_like, *trace_lev, /* --> */ &sky, nrepr, /* dysma */tmp1, /*dysmb*/tmp2, /* beter[], only used here */&tmp[nsamb]); if(*trace_lev >= 2) Rprintf("end{bswap2}: sky = %g\n", sky); selec(*kk, *n, *jpp, *diss_kind, &zb, *nsam, has_NA, jtmd, valmd, *trace_lev, nrepr, nsel, dys, x, nr, &nafs, ttd, radus, ratt, ntmp1, ntmp2, ntmp3, ntmp4, ntmp5, ntmp6, tmp1, tmp2); if (nafs) { /* couldn't assign some observation (to a cluster) * because of too many NA s */ ++nunfs; if(*trace_lev >= 2) Rprintf(" selec() -> 'NAfs'"); } else if(!kall || zba > zb) { /* 1st proper sample or new best */ kall = TRUE; if(*trace_lev >= 2) Rprintf(" 1st proper or new best:"); zba = zb; for (jk = 0; jk < *kk; ++jk) { ttbes[jk] = ttd [jk]; rdbes[jk] = radus[jk]; rabes[jk] = ratt [jk]; nrx [jk] = nr [jk]; } for (js = 0; js < *nsam; ++js) nbest[js] = nsel[js]; sx = s; } if(*trace_lev >= 2) Rprintf(" obj= %g\n", zb/rnn); if(full_sample) break; /* out of resampling */ } /* --- end random sampling loop */ if(*rng_R && !full_sample) PutRNGstate(); if (nunfs >= *nran) { *jstop = 1; return; } /* else */ if (!kall) { *jstop = 2; return; } if(*trace_lev) { Rprintf("C clara(): best sample _found_ "); if(*trace_lev >= 2) { Rprintf("; nbest[1:%d] =\n c(", *nsam); for (js = 0; js < *nsam; ++js) { Rprintf("%d", nbest[js]); if(js+1 < *nsam) Rprintf(","); } Rprintf(")\n"); } Rprintf(" --> dysta2(nbest), resul(), end\n"); } /* for the best subsample, the objects of the entire data set are assigned to their clusters */ *obj = zba / rnn; dysta2(*nsam, *jpp, nbest, x, *n, dys, *diss_kind, jtmd, valmd, has_NA, &dyst_toomany_NA); if(dyst_toomany_NA) { error(_( "clara()'s C level dysta2(nsam=%d, p=%d, nbest=%d, n=%d) gave 'toomany_NA'"), *nsam, *jpp, nbest, *n ); } resul(*kk, *n, *jpp, *diss_kind, has_NA, jtmd, valmd, x, nrx, mtt); if (*kk > 1) black(*kk, *jpp, *nsam, nbest, dys, sx, x, /* compute --> */ avsyl, ttsyl, sylinf, ntmp1, ntmp2, ntmp3, ntmp4, /* syl[] */ tmp1, tmp2); return; } /* End clara() ---------------------------------------------------*/ #undef tmp1 #undef tmp2 #undef ntmp1 #undef ntmp2 #undef ntmp3 #undef ntmp4 #undef ntmp5 #undef ntmp6 void dysta2(int nsam, int jpp, int *nsel, double *x, int n, double *dys, int diss_kind, int *jtmd, double *valmd, Rboolean has_NA, Rboolean *toomany_NA) { /* Compute Dissimilarities for the selected sub-sample ---> dys[,] */ int nlk = 0; dys[0] = 0.;/* very first index; *is* used because ind_2(i,i) |-> 0 ! */ for (int l = 1; l < nsam; ++l) { int lsel = nsel[l]; if(lsel <= 0 || lsel > n) error(_("C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d"), "l", l, lsel, n); for (int k = 0; k < l; ++k) { /* compute d(nsel[l], nsel[k]) {if possible}*/ int ksel = nsel[k]; if(ksel <= 0 || ksel > n) error(_("C level dysta2(): nsel[%s= %d] = %d is outside 0..n, n=%d"), "k", k, ksel, n); ++nlk; int npres = 0, j, lj, kj; double clk = 0.; for (j = 0, lj = lsel-1, kj = ksel-1; j < jpp; ++j, lj += n, kj += n) { if (has_NA && jtmd[j] < 0) { /* x[,j] has some Missing (NA) */ /* in the following line (Fortran!), x[-2] ==> seg.fault {BDR to R-core, Sat, 3 Aug 2002} */ if (x[lj] == valmd[j] || x[kj] == valmd[j]) { continue /* next j */; } } ++npres; if (diss_kind == 1) clk += (x[lj] - x[kj]) * (x[lj] - x[kj]); else clk += fabs(x[lj] - x[kj]); } if (npres == 0) {/* cannot compute d(.,.) because of too many NA */ *toomany_NA = TRUE; dys[nlk] = -1.; } else { double d1 = clk * (jpp / (double) npres); dys[nlk] = (diss_kind == 1) ? sqrt(d1) : d1 ; } } /* for( k ) */ } /* for( l ) */ return; } /* End dysta2() -----------------------------------------------------------*/ double randm(int *nrun) { /* we programmed this generator ourselves because we wanted it to be machine independent. it should run on most computers because the largest int used is less than 2^30 . the period is 2^16=65536, which is good enough for our purposes. */ /* MM: improved the original speed-wise only: */ *nrun = (*nrun * 5761 + 999) & 0177777; /* Masking off all but the last 16 bits is equivalent to % 65536 */ return ((double) (*nrun) / 65536.); } /* randm() */ /* bswap2() : called once [per random sample] from clara() : */ void bswap2(int kk, int n, /* == nsam == 'sampsize', here in clara */ double s, const double dys[], Rboolean pam_like, int trace_lev, // result (though *only* nrepr[] is really used in caller:) double *sky, int *nrepr, double *dysma, double *dysmb, double *beter) { int i, j, ij, k,h, hbest = -1, nbest = -1;/* init for -Wall */ double dzsky; /* Parameter adjustments */ --nrepr; --beter; --dysma; --dysmb; if(trace_lev >= 2) { if(trace_lev == 2) Rprintf("\n bswap2():"); else Rprintf("\nclara()'s bswap2(*, s=%g): ", s); } s = s * 1.1 + 1.;/* value larger than all dissimilarities */ /* ====== first algorithm: BUILD. ====== */ for (i = 1; i <= n; ++i) { nrepr[i] = 0; dysma[i] = s; } for(k = 0; k < kk; k++) { int nmax = -1; /* -Wall */ double ammax = 0.; for (i = 1; i <= n; ++i) { if (nrepr[i] == 0) { beter[i] = 0.; for (j = 1; j <= n; ++j) { double cmd = dysma[j] - dys[ ind_2(i, j)]; if (cmd > 0.) beter[i] += cmd; } if (ammax <= beter[i]) { /* does < (instead of <= ) work too? -- NO! */ ammax = beter[i]; nmax = i; } } } nrepr[nmax] = 1;/* = .true. : found new representative */ if(trace_lev >= 2) { if(trace_lev == 2) Rprintf(" %d", nmax); else Rprintf(" new repr. %d\n", nmax); } /* update dysma[] : dysma[j] = D(j, nearest_representative) */ for (j = 1; j <= n; ++j) { ij = ind_2(nmax, j); if (dysma[j] > dys[ij]) dysma[j] = dys[ij]; } } // output of the above loop: nrepr[], dysma[], ... *sky = 0.; for (j = 1; j <= n; ++j) *sky += dysma[j]; if(trace_lev >= 2) /* >= 2 (?) */ { Rprintf(" after build: medoids are"); for (i = 1; i <= n; ++i) if(nrepr[i] == 1) Rprintf(" %2d", i); if(trace_lev >= 3) { Rprintf("\n and min.dist dysma[1:n] are\n"); for (i = 1; i <= n; ++i) { Rprintf(" %6.3g", dysma[i]); if(i % 10 == 0) Rprintf("\n"); } if(n % 10 != 0) Rprintf("\n"); } else Rprintf("\n"); Rprintf(" --> sky = sum_j D_j= %g\n", *sky); } if (kk == 1) return; // asky = *sky / ((double) n); /* ====== second algorithm: SWAP. ====== */ /* Big LOOP : */ L60: for (j = 1; j <= n; ++j) { /* dysma[j] := D_j d(j, ) [KR p.102, 104] * dysmb[j] := E_j d(j, <2-nd cl.medi>) [p.103] */ dysma[j] = s; dysmb[j] = s; for (i = 1; i <= n; ++i) { if (nrepr[i]) { ij = ind_2(i, j); if (dysma[j] > dys[ij]) { dysmb[j] = dysma[j]; dysma[j] = dys[ij]; } else if (dysmb[j] > dys[ij]) { dysmb[j] = dys[ij]; } } } } dzsky = 1.; /* 1 is arbitrary > 0; only dzsky < 0 matters in the end */ for (h = 1; h <= n; ++h) if (!nrepr[h]) { for (i = 1; i <= n; ++i) if (nrepr[i]) { double dz = 0.; /* dz := T_{ih} := sum_j C_{jih} [p.104] : */ for (j = 1; j <= n; ++j) { int ij = ind_2(i, j), hj = ind_2(h, j); if (dys[ij] == dysma[j]) { double small; if(pam_like) small = dysmb[j] > dys[hj] ? dys[hj] : dysmb[j]; else // old clara code which differs from pam()'s // and seems a bit illogical: small = dysmb[j] > dys[ij] ? dys[hj] : dysmb[j]; dz += (- dysma[j] + small); } else if (dys[hj] < dysma[j]) dz += (- dysma[j] + dys[hj]); } if (dzsky > dz) { dzsky = dz; // dzsky := min_{i,h} T_{i,h} hbest = h; nbest = i; } } } /* once had some 64-bit compiler / data configuration that looped forever*/ R_CheckUserInterrupt(); if (dzsky < 0.) { /* found an improving swap */ if(trace_lev >= 3) Rprintf( " swp new %d <-> %d old; decreasing diss. by %g\n", hbest, nbest, dzsky); nrepr[hbest] = 1; nrepr[nbest] = 0; *sky += dzsky; goto L60; } if(trace_lev >= 2 && hbest != -1) // in my examples hbest == -1 and it does not print: Rprintf( " Last swap: new %d <-> %d old; decreasing diss. by %g\n", hbest, nbest, dzsky); } /* End of bswap2() -------------------------------------------------- */ /* selec() : called once [per random sample] from clara() */ void selec(int kk, int n, int jpp, int diss_kind, double *zb, int nsam, Rboolean has_NA, int *jtmd, double *valmd, int trace_lev, int *nrepr, int *nsel, double *dys, double *x, int *nr, Rboolean *nafs, /* := TRUE if a distance cannot be calculated */ double *ttd, double *radus, double *ratt, // [i]tmp* for clara(), i.e. not used later! int *nrnew, int *nsnew, int *npnew, int *ns, int *np, int *new, double *ttnew, double *rdnew) { /* Local variables */ int j, jk, jj, jp, jnew, ka, kb, jkabc = -1/* -Wall */; int newf, nrjk, npab, nstrt, na, nb, npa, npb, njk, nobs; double pp = (double) (jpp), tra; /* Parameter adjustments */ --nsel; --nrepr; --ratt; --radus; --ttd; --np; --nr; --ns; --rdnew; --ttnew; --npnew; --nrnew; --nsnew; --new; /* nafs := TRUE if a distance cannot be calculated (because of NA s)*/ *nafs = FALSE; /* identification of representative objects, and initializations */ jk = 0; for (j = 1; j <= nsam; ++j) { if (nrepr[j] != 0) { ++jk; nr [jk] = nsel[j]; ns [jk] = 0; ttd [jk] = 0.; radus[jk] = -1.; np [jk] = j; } } /* - assignment of the objects of the entire data set to a cluster, * - computation of some statistics, * - determination of the new ordering of the clusters */ *zb = 0.; newf = 0; for(jj = 1; jj <= n; jj++) { double dsum, dnull = -9./* -Wall */; if (!has_NA) { for (jk = 1; jk <= kk; ++jk) { dsum = 0.; nrjk = nr[jk]; if (nrjk != jj) { for (jp = 0; jp < jpp; ++jp) { na = (nrjk - 1) + jp * n; nb = (jj - 1) + jp * n; tra = fabs(x[na] - x[nb]); if (diss_kind == 1) tra *= tra; dsum += tra; } if (jk != 1 && dsum >= dnull) continue /* next jk */; } dnull = dsum; jkabc = jk; } } else { /* _has_ missing data */ Rboolean pres = FALSE; for (jk = 1; jk <= kk; ++jk) { dsum = 0.; nrjk = nr[jk]; if (nrjk != jj) { nobs = 0; for (jp = 0; jp < jpp; ++jp) { na = (nrjk - 1) + jp * n; nb = (jj - 1) + jp * n; if (jtmd[jp] < 0) { if (x[na] == valmd[jp] || x[nb] == valmd[jp]) continue /* next jp */; } nobs++; tra = fabs(x[na] - x[nb]); if (diss_kind == 1) tra *= tra; dsum += tra; } if (nobs == 0) /* all pairs partially missing */ continue /* next jk */; dsum *= (nobs / pp); /* MM: ^^^^^^^^^ fishy; rather * (pp/nobs) as in dysta2*/ } if (!pres) pres = TRUE; else if (dnull <= dsum) continue /* next jk */; /* here : pres was FALSE {i.e. 1st time} or * dnull > dsum {i.e. new best} */ dnull = dsum; jkabc = jk; }/* for(jk ..) */ if (!pres) { /* found nothing */ *nafs = TRUE; return; } } /* else: has_NA */ if (diss_kind == 1) dnull = sqrt(dnull); *zb += dnull; ttd[jkabc] += dnull; if (radus[jkabc] < dnull) radus[jkabc] = dnull; ++ns[jkabc]; if (newf < kk) { if (newf != 0) { for (jnew = 1; jnew <= newf; ++jnew) { if (jkabc == new[jnew]) goto L90;/* next jj */ } } ++newf; new[newf] = jkabc; } L90: ; } /* for( jj = 1..n ) */ /* a permutation is carried out on vectors nr,ns,np,ttd,radus using the information in vector new. */ for (jk = 1; jk <= kk; ++jk) { njk = new[jk]; nrnew[jk] = nr[njk]; nsnew[jk] = ns[njk]; npnew[jk] = np[njk]; ttnew[jk] = ttd[njk]; rdnew[jk] = radus[njk]; } for (jk = 1; jk <= kk; ++jk) { nr[jk] = nrnew[jk]; ns[jk] = nsnew[jk]; np[jk] = npnew[jk]; ttd[jk] = ttnew[jk]; radus[jk] = rdnew[jk]; } for (j = 1; j <= kk; ++j) { double rns = (double) ns[j]; ttd[j] /= rns; } if (kk > 1) { /* computation of ratt[ka] := minimal distance of medoid ka to any other medoid for comparison with the radius of cluster ka. */ for (ka = 1; ka <= kk; ++ka) { nstrt = 0; npa = np[ka]; for (kb = 1; kb <= kk; ++kb) { if (kb == ka) continue /* next kb */; npb = np[kb]; npab = ind_2(npa, npb); if (nstrt == 0) nstrt = 1; else if (dys[npab] >= ratt[ka]) continue /* next kb */; ratt[ka] = dys[npab]; if (ratt[ka] == 0.) ratt[ka] = -1.; } if (ratt[ka] > -0.5) ratt[ka] = radus[ka] / ratt[ka]; } } return; } /* End selec() -----------------------------------------------------------*/ void resul(int kk, int n, int jpp, int diss_kind, Rboolean has_NA, int *jtmd, double *valmd, double *x, int *nrx, int *mtt) { /* Local variables */ int j, jk, jj, ka, na, nb, njnb, nrjk, nobs, jksky = -1/* Wall */; double pp = (double) (jpp), dsum, tra, dnull = -9./* Wall */; /* clustering vector is incorporated into x, and ``printed''. */ for(jj = 0; jj < n; jj++) { for (jk = 0; jk < kk; ++jk) { if (nrx[jk] == jj + 1)/* 1-indexing */ goto L220; /* continue next jj (i.e., outer loop) */ } njnb = jj; if (!has_NA) { for (jk = 0; jk < kk; ++jk) { dsum = 0.; nrjk = (nrx[jk] - 1); for (j = 0; j < jpp; ++j) { tra = fabs(x[nrjk + j * n] - x[njnb + j * n]); if (diss_kind == 1) tra *= tra; dsum += tra; } if (diss_kind == 1) dsum = sqrt(dsum); if (jk == 0 || dnull > dsum) { dnull = dsum; jksky = jk; } } } else { /* _has_ missing data */ for (jk = 0; jk < kk; ++jk) { dsum = 0.; nrjk = (nrx[jk] - 1); nobs = 0; for (j = 0; j < jpp; ++j) { na = nrjk + j * n; nb = njnb + j * n; if (jtmd[j] < 0) { if (x[na] == valmd[j] || x[nb] == valmd[j]) continue /* next j */; } nobs++; tra = fabs(x[na] - x[nb]); if (diss_kind == 1) tra *= tra; dsum += tra; } if (diss_kind == 1) dsum = sqrt(dsum); dsum *= (nobs / pp); /* MM: ^^^^^^^^ fishy; rather * (pp/nobs) as in dysta2 */ if (jk == 0 || dnull > dsum) { dnull = dsum; jksky = jk; } } } x[njnb] = (double) jksky + 1;/* 1-indexing */ L220: ; } /* for(jj = 0; jj < n ..)*/ for (jk = 0; jk < kk; ++jk) x[nrx[jk] - 1] = (double) jk + 1;/* 1-indexing */ /* mtt[k] := size(k-th cluster) : */ for (ka = 0; ka < kk; ++ka) { mtt[ka] = 0; for(j = 0; j < n; j++) { if (((int) x[j]) == ka + 1)/* 1-indexing */ ++mtt[ka]; } } return; } /* end resul() -----------------------------------------------------------*/ // called 'dark()' in ./pam.c void black(int kk, int jpp, int nsam, int *nbest, double *dys, double s, double *x, /* --> Output : */ double *avsyl, double *ttsyl, double *sylinf, /* but the following output vectors are never used by clara() : */ int *ncluv, int *nsend, int *nelem, int *negbr, double *syl, double *srank) { /* Silhouettes computation and "drawing" --> syl[] and sylinf[] */ /* System generated locals */ int sylinf_dim1, sylinf_offset; /* Local variables */ double att, btt, db, dysa, dysb, symax; int lang = -1/* -Wall */; int j, l, lplac, nj, nl, nbb, ncase, nclu, numcl, nsylr, ntt; /* Parameter adjustments */ --avsyl; --srank; --syl; --negbr; --nelem; --nsend; --ncluv; --nbest; sylinf_dim1 = nsam; sylinf_offset = 1 + sylinf_dim1 * 1; sylinf -= sylinf_offset; /* construction of clustering vector (ncluv) of selected sample (nbest). */ /* Function Body */ for (l = 1; l <= nsam; ++l) { ncase = nbest[l]; ncluv[l] = (int) x[ncase - 1]; } /* drawing of the silhouettes */ nsylr = 0; *ttsyl = 0.; for (numcl = 1; numcl <= kk; ++numcl) { ntt = 0; for (j = 1; j <= nsam; ++j) { if (ncluv[j] == numcl) { ++ntt; nelem[ntt] = j; } } for (j = 1; j <= ntt; ++j) { nj = nelem[j]; dysb = s * 1.1 + 1.; negbr[j] = -1; for (nclu = 1; nclu <= kk; ++nclu) { if (nclu != numcl) { nbb = 0; db = 0.; for (l = 1; l <= nsam; ++l) { if (ncluv[l] == nclu) { ++nbb; db += dys[ind_2(nj, l)]; } } btt = (double) nbb; db /= btt; if (db < dysb) { dysb = db; negbr[j] = nclu; } } } if (ntt == 1) { syl[j] = 0.; continue /* j */; } dysa = 0.; for (l = 1; l <= ntt; ++l) { nl = nelem[l]; dysa += dys[ind_2(nj, nl)]; } att = (double) (ntt - 1); dysa /= att; if (dysa <= 0.) { if (dysb > 0.) syl[j] = 1.; else syl[j] = 0.; continue /* j */; } if (dysb > 0.) { if (dysb > dysa) syl[j] = 1. - dysa / dysb; else if (dysb < dysa) syl[j] = dysb / dysa - 1.; else /* (dysb == dysa) */ syl[j] = 0.; if (syl[j] < -1.) syl[j] = -1.; else if (syl[j] > 1.) syl[j] = 1.; } else { syl[j] = -1.; } } /* for(j ..) */ avsyl[numcl] = 0.; for (j = 1; j <= ntt; ++j) { symax = -2.; for (l = 1; l <= ntt; ++l) { if (syl[l] > symax) { symax = syl[l]; lang = l; } } nsend[j] = lang; srank[j] = syl[lang]; avsyl[numcl] += srank[j]; syl[lang] = -3.; } *ttsyl += avsyl[numcl]; avsyl[numcl] /= ntt; if (ntt >= 2) { for (l = 1; l <= ntt; ++l) { lplac = nsend[l]; ncase = nelem[lplac]; ++nsylr; sylinf[nsylr + sylinf_dim1] = (double) numcl; sylinf[nsylr + (sylinf_dim1 << 1)] = (double) negbr[lplac]; sylinf[nsylr + sylinf_dim1 * 3] = srank[l]; sylinf[nsylr + (sylinf_dim1 << 2)] = (double) nbest[ncase]; } } else { ncase = nelem[1]; ++nsylr; sylinf[nsylr + sylinf_dim1] = (double) numcl; sylinf[nsylr + (sylinf_dim1 << 1)] = (double) negbr[1]; sylinf[nsylr + sylinf_dim1 * 3] = 0.; sylinf[nsylr + (sylinf_dim1 << 2)] = (double) nbest[ncase]; } } *ttsyl /= (double) (nsam); return; } /* black */ cluster/src/twins.c0000644000175100001440000003346312553131040014053 0ustar hornikusers/* Produced by * $Id: f2c-clean,v 1.10 2002/03/28 16:37:27 maechler Exp $ * * twins.f -- translated by f2c (version 20031025). */ #include #include #include /* for diagnostics */ #include /* for interrupting */ #include "cluster.h" #include "ind_2.h" // the auxiliary routines static void agnes(int nn, int *kwan, int *ner, double *ban, double dys[], int method, double *alpha, int *merge, int trace_lev); static void splyt(int nn, int *kwan, int *ner, double *ban, double dys[], int *merge, int trace_lev); static double min_dis(double dys[], int ka, int kb, int ner[]); /* This program performs agglomerative nesting (AGNES) using the */ /* group average method (_or others_) of Sokal and Michener (1958), */ /* as well as divisive analysis (DIANA) using the method of */ /* Mcnaughton-Smith, Williams, Dale, and Mockett (1964). */ /* Extended by Martin Maechler to allow the (flexible) */ /* Lance-Williams clustering method (with parameters alpha[1:4]) */ void twins(int *nn, // = maximal number of objects int *jpp,// = maximal number of variables used in the analysis double *x, double *dys, double *dys2,// dys2(.) can have length 1, if(!keep.diss) int *jdyss, /* jdyss (in/out): initially, jdyss mod 10 = 1 : <==> diss = TRUE * jdyss < 10 : don't save dissimilarities (C.keep.diss = FALSE) */ double *valmd, int *jtmd, int *ndyst, int *jalg, int *method, // for agnes() only int *kwan, int *ner, // = order [] (in R) double *ban, // = height[] double *coef, double *alpha, int *merge, int *trace_lev) { if (*jdyss % 10 == 1) { *jpp = 1; } else { // compute distances int jhalt = 0; F77_CALL(dysta)(nn, jpp, x, dys, ndyst, jtmd, valmd, &jhalt); /* ------ in ./dysta.f */ if (jhalt != 0) { *jdyss = -1; return; } } if (*jdyss >= 10) { /* save distances for S */ Memcpy(dys2, dys, (*nn * (*nn - 1) / 2 + 1)); } if (*jalg != 2) { // AGNES agnes(*nn, kwan, ner, ban, dys, *method, alpha, merge, trace_lev[0]); } else { // DIANA splyt(*nn, kwan, ner, ban, dys, merge, trace_lev[0]); } // Compute agglomerative/divisive coefficient from banner: *coef = bncoef(*nn, ban); return; } /* twins */ // merge[i,j] i=0..n_1, j = 1,2. --- for agnes() and splyt() [= diana] --- #define Merge(i,j) merge[(j==1) ? (i) : (i+n_1)] /* ----------------------------------------------------------- */ /* AGNES agglomeration */ static void agnes(int nn, int *kwan, int *ner, double *ban, double dys[], int method, double *alpha, int *merge, int trace_lev) { /* VARs */ int n_1 = nn - 1, _d, j, k, la = -1, lb = -1; // -Wall] Rboolean has_a3 = FALSE, has_a4 = FALSE,// is alpha[3] or [4] != 0 -- for Lance-Williams flex_meth = (method == 6 || method == 7); // 6: "flexible": "Flexible Strategy" (K+R p.236 f) extended to 'Lance-Williams' // 7: "gaverage" aka Flexible UPGMA (Belbin et al., 1992) /* Parameter adjustments */ --ban; --ner; --kwan; --alpha; if(trace_lev) { _d = (nn >= 100) ? 3 : (nn >= 10) ? 2 : 1; Rprintf("C agnes(n=%*d, method = %d, ..): ", _d,nn, method); } else _d = -1;// -Wall if(flex_meth) { has_a3 = (alpha[3] != 0.); has_a4 = (alpha[4] != 0.); if(trace_lev) { if(has_a4) Rprintf("|par| = 4, alpha[1:4] = (%g,%g,%g,%g); ", alpha[1],alpha[2],alpha[3],alpha[4]); else if(has_a3) Rprintf("|par| = 3, alpha[1:3] = (%g,%g,%g); ", alpha[1],alpha[2],alpha[3]); } } // Starting with nn clusters, kwan[j] := #{obj} in cluster j for (j = 1; j <= nn; ++j) { kwan[j] = 1; ner[j] = j; } // ---------------------------------------------------------------------------- /* find closest clusters */ if(trace_lev) Rprintf("%d merging steps\n", n_1); for (int nmerge = 0; nmerge < n_1; ++nmerge) { // j := min_j { kwan[j] > 0} = first non-empty cluster (j >= 2) j = 1; do { j++; } while(kwan[j] == 0); if(trace_lev >= 2) Rprintf(" nmerge=%*d, j=%*d, ", _d,nmerge, _d,j); double d_min = dys[ind_2(1, j)] * 1.1f + 1.; for (k = 1; k <= n_1; ++k) if (kwan[k] > 0) { for (j = k + 1; j <= nn; ++j) if (kwan[j] > 0) { int k_j = ind_2(k, j); if (d_min >= dys[k_j]) { // Note: also when "==" ! d_min = dys[k_j]; la = k; lb = j; } } } // --> closest clusters {la < lb} are at distance d_min if(trace_lev >= 2) Rprintf("d_min = D(%*d,%*d) = %#g; ", _d,la, _d,lb, d_min); /* merge-structure for plotting tree in S */ int l1 = -la, l2 = -lb; for (j = 0; j < nmerge; ++j) { if (Merge(j, 1) == l1 || Merge(j, 2) == l1) l1 = j+1; if (Merge(j, 1) == l2 || Merge(j, 2) == l2) l2 = j+1; } Merge(nmerge, 1) = l1; Merge(nmerge, 2) = l2; if(trace_lev >= 3) Rprintf(" -> (%*d,%*d); ", _d,l1, _d,l2); if(flex_meth && l1 == l2) { // can happen with non-sensical (alpha_1,alpha_2,beta, ...) error(_("agnes(method=%d, par.method=*) lead to invalid merge; step %d, D(.,.)=%g"), method, nmerge+1, d_min); } /* determine lfyrs and llast */ int llast = -1, lfyrs = -1; // -Wall for (k = 1; k <= nn; ++k) { if (ner[k] == la) lfyrs = k; if (ner[k] == lb) llast = k; } ban[llast] = d_min; if(trace_lev >= 2) Rprintf("last=%*d;", _d,llast); /* if the two clusters are next to each other, ner must not be changed */ int lnext = lfyrs + kwan[la]; if (lnext != llast) { /* updating ner and ban */ if(trace_lev >= 2) Rprintf(" upd(n,b);"); int lput = lfyrs + kwan[la], lenda = llast + kwan[lb] - 2; for (k = 1; k <= llast - lput; ++k) { int lka = ner[lput]; double akb = ban[lput]; for (j = lput; j <= lenda; ++j) { ner[j] = ner[j + 1]; ban[j] = ban[j + 1]; } ner[lenda+1] = lka; ban[lenda+1] = akb; } } if(trace_lev >= 3) Rprintf("\n"); /* We will merge A & B into A_{new} */ // Calculate new dissimilarities d(q, A_{new}) for (int lq = 1; lq <= nn; ++lq) { // for each other cluster 'q' if (lq == la || lq == lb || kwan[lq] == 0) continue; int naq = ind_2(la, lq); int nbq = ind_2(lb, lq); if(trace_lev >= 3) Rprintf(" old D(A, j), D(B, j), j=%*d = (%g,%g); ", _d,lq, dys[naq], dys[nbq]); switch(method) { case 1: { // 1: unweighted pair-]group average method, UPGMA double ta = (double) kwan[la], tb = (double) kwan[lb], fa = ta / (ta + tb), fb = tb / (ta + tb); dys[naq] = fa * dys[naq] + fb * dys[nbq]; break; } case 2: /* 2: single linkage */ dys[naq] = fmin2(dys[naq], dys[nbq]); break; case 3: /* 3: complete linkage */ dys[naq] = fmax2(dys[naq], dys[nbq]); break; case 4: { // 4: ward's method double ta = (double) kwan[la], tb = (double) kwan[lb], tq = (double) kwan[lq], fa = (ta + tq) / (ta + tb + tq), fb = (tb + tq) / (ta + tb + tq), fc = -tq / (ta + tb + tq); int nab = ind_2(la, lb); dys[naq] = sqrt(fa * dys[naq] * dys[naq] + fb * dys[nbq] * dys[nbq] + fc * dys[nab] * dys[nab]); break; } case 5: /* 5: weighted average linkage */ dys[naq] = (dys[naq] + dys[nbq]) / 2.; break; case 6: { // 6: "Flexible Strategy" (K+R p.236 f) extended to 'Lance-Williams' double dNew = alpha[1] * dys[naq] + alpha[2] * dys[nbq]; if(has_a3) dNew += alpha[3] * dys[ind_2(la, lb)]; if(has_a4) dNew += alpha[4] * fabs(dys[naq] - dys[nbq]); dys[naq] = dNew; /* Lance-Williams would allow alpha(1:2) to *depend* on |cluster| * could also include the extensions of Jambu(1978) -- * See Gordon A.D. (1999) "Classification" (2nd ed.) p.78 ff */ break; } case 7: {/* 7: generalized "average" = Flexible UPGMA (Belbin et al., 1992) * Applies the flexible Lance-Williams formula to the UPGMA, aka * "average" case 1 above, i.e., alpha_{1,2} depend on cluster sizes: */ double ta = (double) kwan[la], tb = (double) kwan[lb], fa = alpha[1] * ta / (ta + tb), fb = alpha[2] * tb / (ta + tb), dNew = fa * dys[naq] + fb * dys[nbq]; if(has_a3) dNew += alpha[3] * dys[ind_2(la, lb)]; if(has_a4) dNew += alpha[4] * fabs(dys[naq] - dys[nbq]); dys[naq] = dNew; break; } default: error(_("invalid method (code %d)"), method); } if(trace_lev >= 3) Rprintf(" new D(A', %*d) = %g\n", _d,lq, dys[naq]); } // for (lq ..) kwan[la] += kwan[lb]; kwan[lb] = 0; if(trace_lev >= 2) Rprintf("%s size(A_new)= %d\n", (trace_lev >= 3)? " --> " : "", kwan[la]); }// for(nmerge ..) return; } /* agnes */ /* ----------------------------------------------------------- */ /* cf = ac := "Agglomerative Coefficient" from AGNES banner */ /* or cf = dc := "Divisive Coefficient" from DIANA banner */ void R_bncoef(int *nn, double *ban, double *cf) { *cf = bncoef(*nn, ban); } double bncoef(int n, double *ban) { int k, n_1 = n-1; double sup = 0.;// sup := max_k ban[k] for(k = 1; k < n; ++k) if (sup < ban[k]) sup = ban[k]; double cf = 0.; for (k = 0; k < n; ) { int kearl = (k > 0) ? k : 1, kafte = (++k < n) ? k : n_1; double syze = fmin2(ban[kearl], ban[kafte]); cf += (1. - syze / sup); } return cf / n; } /* bncoef */ /* ----------------------------------------------------------- */ /* DIANA "splitting" */ static void splyt(int nn, int *kwan, int *ner, double *ban, double dys[], int *merge, int trace_lev) { /* Local variables */ int j, ja, jb, k, l; int jma, jmb, lmm, llq, lmz, lxx, lxy, lmma, lmmb, lner, nclu; int lchan, nhalf, n_1 = nn - 1, splyn; /* Parameter adjustments */ --ban; --ner; --kwan; /* initialization */ nclu = 1; nhalf = nn * n_1 / 2 + 1; for (l = 1; l <= nn; ++l) { kwan[l] = 0; ban[l] = 0.; ner[l] = l; } kwan[1] = nn; ja = 1; /* cs := diameter of data set */ double cs = 0.; for(k = 0; k < nhalf; ++k) { if (cs < dys[k]) cs = dys[k]; } if(trace_lev) Rprintf("C diana(): ndist= %d, diameter = %g\n", nhalf, cs); /* prepare for splitting */ //____________ Big Loop _________________________________________________ L30: jb = ja + kwan[ja] - 1; jma = jb; if (kwan[ja] == 2) { // special case of a pair of objects kwan[ja] = 1; kwan[jb] = 1; ban [jb] = dys[ind_2(ner[ja], ner[jb])]; } else { /* finding first object to be shifted */ double bygsd = -1.; int lndsd = -1; for (l = ja; l <= jb; ++l) { lner = ner[l]; double sd = 0.; for (j = ja; j <= jb; ++j) sd += dys[ind_2(lner, ner[j])]; if (bygsd < sd) { bygsd = sd; lndsd = l; } } /* shifting the first object */ --kwan[ja]; kwan[jb] = 1; if (jb != lndsd) { lchan = ner[lndsd]; lmm = jb - 1; for (lmma = lndsd; lmma <= lmm; ++lmma) { lmmb = lmma + 1; ner[lmma] = ner[lmmb]; } ner[jb] = lchan; } splyn = 0; jma = jb - 1; /* finding the next object to be shifted */ do { splyn++; int rest = (jma - ja), jaway = -1; double bdyff = -1.; for (l = ja; l <= jma; ++l) { lner = ner[l]; double da = 0., db = 0.; for (j = ja; j <= jma; ++j) da += dys[ind_2(lner, ner[j])]; da /= rest; for (j = jma + 1; j <= jb; ++j) db += dys[ind_2(lner, ner[j])]; db /= splyn; double dyff = da - db; if (bdyff < dyff) { bdyff = dyff; jaway = l; } } /* end for(l ..) */ jmb = jma + 1; /* shifting the next object when necessary */ if (bdyff <= 0.) break; // out of "object shifting" while(.) loop if (jma != jaway) { lchan = ner[jaway]; lmz = jma - 1; for (lxx = jaway; lxx <= lmz; ++lxx) ner[lxx] = ner[lxx + 1]; ner[jma] = lchan; } for (lxx = jmb; lxx <= jb; ++lxx) { lxy = lxx - 1; if (ner[lxy] < ner[lxx]) break; lchan = ner[lxy]; ner[lxy] = ner[lxx]; ner[lxx] = lchan; } --kwan[ja]; kwan[jma] = kwan[jmb] + 1; kwan[jmb] = 0; --jma; jmb = jma + 1; } while (jma != ja); // 200: switch the two parts when necessary if (ner[ja] >= ner[jmb]) { int lxxa = ja; for (int lgrb = jmb; lgrb <= jb; ++lgrb) { ++lxxa; lchan = ner[lgrb]; int lxg = -1; for (lxy = lxxa; lxy <= lgrb; ++lxy) { int lxf = lgrb - lxy + lxxa; lxg = lxf - 1; ner[lxf] = ner[lxg]; } ner[lxg] = lchan; } llq = kwan[jmb]; kwan[jmb] = 0; jma = ja + jb - jma - 1; jmb = jma + 1; kwan[jmb] = kwan[ja]; kwan[ja] = llq; } /* 300 : compute level for banner */ if (nclu == 1) { ban[jmb] = cs; } else { ban[jmb] = min_dis(dys, ja, jb, &ner[1]); } } if (++nclu < nn) { /* continue splitting until all objects are separated */ if (jb != nn) { L420: ja += kwan[ja]; if (ja <= nn) { if (kwan[ja] <= 1) goto L420; else goto L30; } } ja = 1; if (kwan[ja] == 1) goto L420; else goto L30; } //____________ End Big Loop _________________________________________________ /* 500 : merge-structure for plotting tree in S */ for (int nmerge = 0; nmerge < n_1; ++nmerge) { int nj = -1, l1, l2; double dmin = cs; for (j = 2; j <= nn; ++j) { if (kwan[j] >= 0 && dmin >= ban[j]) { dmin = ban[j]; nj = j; } } kwan[nj] = -1; l1 = -ner[nj - 1]; l2 = -ner[nj]; for (j = 0; j < nmerge; ++j) { if (Merge(j, 1) == l1 || Merge(j, 2) == l1) l1 = j+1; if (Merge(j, 1) == l2 || Merge(j, 2) == l2) l2 = j+1; } Merge(nmerge, 1) = l1; Merge(nmerge, 2) = l2; } return; } /* splyt */ /* ----------------------------------------------------------- */ /* used in splyt() above */ static double min_dis(double dys[], int ka, int kb, int ner[]) { double dm = 0.; for(int k = ka -1; k < kb -1; ++k) { int ner_k = ner[k]; for (int j = k+1; j < kb; ++j) { int k_j = ind_2(ner_k, ner[j]); if (dm < dys[k_j]) dm = dys[k_j]; } } return dm; } /* min_dis */ cluster/src/dysta.f0000644000175100001440000000310012553131040014017 0ustar hornikusers c Dysta() : c c Compute Distances from X matrix {also for agnes() and diana()}: c ----------------------------------------------------------- c c was part of pam.f --- now called both from Fortran & C c "keep in sync" with daisy.f {move both to C or replace by R's dist!} c subroutine dysta(nn,p,x,dys,ndyst,jtmd,valmd,jhalt) integer nn, p, ndyst, jtmd(p), jhalt double precision x(nn,p), dys(1+nn*(nn-1)/2), valmd(p) c ndyst = 1 : euclidean c "else"(2) : manhattan c VARs integer nlk,j,l,k, lsubt, npres double precision pp, clk, rpres nlk=1 dys(1)=0.0 c ---------- is used potentially for d[i,i] == dys[1] == 0 pp=p do 100 l=2,nn lsubt=l-1 do 20 k=1,lsubt clk=0.0 nlk=nlk+1 npres=0 do 30 j=1,p if(jtmd(j).lt.0) then ! some x(*,j) are missing (NA) if(x(l,j).eq.valmd(j))goto 30 if(x(k,j).eq.valmd(j))goto 30 endif npres=npres+1 if(ndyst.eq.1) then clk=clk+ (x(l,j)-x(k,j))*(x(l,j)-x(k,j)) else clk=clk+ dabs(x(l,j)-x(k,j)) endif 30 continue rpres=npres if(npres.eq.0) then jhalt=1 dys(nlk)=-1.0 else if(ndyst.eq.1) then dys(nlk)= dsqrt(clk*(pp/rpres)) else dys(nlk)= clk*(pp/rpres) endif endif 20 continue 100 continue end cluster/src/ind_2.h0000644000175100001440000000205212553131040013675 0ustar hornikusers/* inlined, to be included in pam.c and clara.c */ static #ifdef __GNUC__ __inline__ #endif int ind_2(int l, int j) { /* Utility, originally FORTRAN, called "meet"; called from CLARA, PAM & TWINS. * Original code had MEET(), MEET2(), and MEET3() in the 3 source files. * ind_2(l,j) returns the *index* of dys() where diss. d(l,j) is stored: * d(l,j) == dys[ind_2(l,j)] * * MM: changed to work with 0-origin matrices dys[], but l,j are >= 1 */ #ifdef was_orig if(l > j) return (l-2)*(l-1)/2 + j; else if(l == j) return 0;/* and the first element, dys[0] is := 0. permanently! */ else /* l < j */ return (j-2)*(j-1)/2 + l; #else /* from Li Long -- optimizes particularly well on Itanium2 */ /* max_m check by MMächler: max_m is the largest integer m * ----- such that (m-2)*(m-1) < MAX_INT : */ #define max_m 46342 int result = 0; if (l != j) { int m = l>j ? l : j; int n = l>j ? j : l; result = (m <= max_m) ? (m-2)*(m-1)/2 + n : (int) (((double) m-2)*(m-1)/2 + n); } return result; #endif } cluster/src/init.c0000644000175100001440000000557512553131040013655 0ustar hornikusers#include #include #include "cluster.h" #include #define CDEF(name) {#name, (DL_FUNC) &name, sizeof(name ## _t)/sizeof(name ## _t[0]), name ##_t} #define CALLDEF(name, n) {#name, (DL_FUNC) &name, n} static R_NativePrimitiveArgType R_bncoef_t[3] = { INTSXP, REALSXP, REALSXP }; static R_NativePrimitiveArgType cl_clara_t[33] = { /*n:*/ INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, INTSXP, REALSXP, INTSXP, /*valmd:*/ REALSXP, INTSXP, INTSXP, /* rng_R: */ LGLSXP, /* pam_like:*/ LGLSXP, /*nrepr: */ INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, /*radus:*/ REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, /*obj: */ REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, /*tmp: */ REALSXP,INTSXP }; static R_NativePrimitiveArgType cl_fanny_t[27] = { INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, /*jdyss: */ INTSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, /*negbr: */ INTSXP, /*syl: */ REALSXP, REALSXP, REALSXP, REALSXP, /*nfuzz: */ INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, /*obj: */ REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, INTSXP }; static R_NativePrimitiveArgType cl_pam_t[24] = { INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, /*jdyss: */ INTSXP, REALSXP, INTSXP, INTSXP, INTSXP, /*nrepr: */ LGLSXP, INTSXP, /*radus: */ REALSXP, REALSXP, REALSXP, REALSXP, /*ttsyl: */ REALSXP, REALSXP, /*medoids*/ INTSXP, INTSXP, REALSXP, REALSXP, INTSXP, /*optim: */ INTSXP }; static R_NativePrimitiveArgType spannel_t[12] = { // ./spannel.c : INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, /*varss: */ REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP }; static R_NativePrimitiveArgType sildist_t[] = { REALSXP, INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, /* si: */ REALSXP, INTSXP, LGLSXP }; static R_NativePrimitiveArgType twins_t[18] = { INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, /* jdiss: */ INTSXP, REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, /* kwan: */ INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP }; /* is only .C()-called from ../tests/sweep-ex.R : */ static R_NativePrimitiveArgType cl_sweep_t[5] = { REALSXP, INTSXP, INTSXP, INTSXP, REALSXP }; static const R_CMethodDef CEntries[] = { CDEF(R_bncoef), CDEF(cl_clara), {"dysta3", (DL_FUNC) &dysta3, 8},/* ./fanny.c */ CDEF(cl_fanny), CDEF(cl_pam), CDEF(spannel), CDEF(cl_sweep), CDEF(sildist), CDEF(twins), {NULL, NULL, 0} }; static R_CallMethodDef CallEntries[] = { CALLDEF(cl_Pam, 13), {NULL, NULL, 0} }; static R_FortranMethodDef FortEntries[] = { {"cl_daisy", (DL_FUNC) &F77_SUB(cldaisy), 11}, {"cl_mona", (DL_FUNC) &F77_SUB(clmona), 9}, {"dysta", (DL_FUNC) &F77_SUB(dysta), 8}, {NULL, NULL, 0} }; void R_init_cluster(DllInfo *dll) { R_registerRoutines(dll, CEntries, CallEntries, FortEntries, NULL); R_useDynamicSymbols(dll, FALSE); } cluster/NAMESPACE0000644000175100001440000000465112547427120013202 0ustar hornikusersuseDynLib(cluster, .registration=TRUE) ## S3 Generics: export(clusplot, pltree, silhouette, volume) ## Normal functions (non-generics, non-methods): ## The original constructors: export(agnes, clara, daisy, diana, fanny, mona, pam) ## and the rest export(bannerplot, ellipsoidhull, ellipsoidPoints, clusGap, maxSE, lower.to.upper.tri.inds, upper.to.lower.tri.inds, meanabsdev, sizeDiss, sortSilhouette) ## Methods also useful as 'standalone functions': export(predict.ellipsoid) export(coef.hclust, coefHier) importFrom("stats", as.hclust, as.dist, as.dendrogram, cmdscale, coef, cov.wt, dist, mahalanobis, median, na.omit, princomp, runif, setNames, var, weighted.mean) ## For now, we keep *depending* on 'stats' just so that ## S3 methods *.hclust, *.dist etc will work importFrom("graphics", ## probably needs more: arrows, axis, barplot, identify, mtext, par, plot, points, polygon, rect, segments, text, title) importFrom("grDevices", dev.interactive) ## still always *depend* on 'utils', for many examples etc: importFrom("utils", menu, str) ###---- Methods ---- all documented but not exported ## of own generics S3method(clusplot, default) S3method(clusplot, partition) S3method(pltree, twins) S3method(silhouette, default) S3method(silhouette, clara) S3method(silhouette, partition) S3method(volume, ellipsoid) ## register all the method for generics elsewhere in case namespace is ## loaded but not currently attached. S3method(as.dendrogram, twins) S3method(coef, hclust) S3method(coef, twins) S3method(plot, agnes) S3method(plot, diana) S3method(plot, mona) S3method(plot, partition) S3method(plot, silhouette) S3method(plot, clusGap) #exported: S3method(predict, ellipsoid) S3method(print, agnes) S3method(print, clara) S3method(print, diana) S3method(print, dissimilarity) S3method(print, ellipsoid) S3method(print, fanny) S3method(print, mona) S3method(print, pam) S3method(print, clusGap) S3method(print, summary.agnes) S3method(print, summary.clara) S3method(print, summary.diana) S3method(print, summary.dissimilarity) S3method(print, summary.fanny) S3method(print, summary.mona) S3method(print, summary.pam) S3method(print, summary.silhouette) S3method(summary, agnes) S3method(summary, clara) S3method(summary, diana) S3method(summary, dissimilarity) S3method(summary, fanny) S3method(summary, mona) S3method(summary, pam) S3method(summary, silhouette) cluster/data/0000755000175100001440000000000012553131037012662 5ustar hornikuserscluster/data/agriculture.tab0000644000175100001440000000032610400542206015672 0ustar hornikusers x y B 16.8 2.7 DK 21.3 5.7 D 18.7 3.5 GR 5.9 22.2 E 11.4 10.9 F 17.8 6.0 IRL 10.9 14.0 I 16.6 8.5 L 21.0 3.5 NL 16.4 4.3 P 7.8 17.4 UK 14.0 2.3 cluster/data/ruspini.tab0000644000175100001440000000150410400542206015034 0ustar hornikusers x y 1 4 53 2 5 63 3 10 59 4 9 77 5 13 49 6 13 69 7 12 88 8 15 75 9 18 61 10 19 65 11 22 74 12 27 72 13 28 76 14 24 58 15 27 55 16 28 60 17 30 52 18 31 60 19 32 61 20 36 72 21 28 147 22 32 149 23 35 153 24 33 154 25 38 151 26 41 150 27 38 145 28 38 143 29 32 143 30 34 141 31 44 156 32 44 149 33 44 143 34 46 142 35 47 149 36 49 152 37 50 142 38 53 144 39 52 152 40 55 155 41 54 124 42 60 136 43 63 139 44 86 132 45 85 115 46 85 96 47 78 94 48 74 96 49 97 122 50 98 116 51 98 124 52 99 119 53 99 128 54 101 115 55 108 111 56 110 111 57 108 116 58 111 126 59 115 117 60 117 115 61 70 4 62 77 12 63 83 21 64 61 15 65 69 15 66 78 16 67 66 18 68 58 13 69 64 20 70 69 21 71 66 23 72 61 25 73 76 27 74 72 31 75 64 30 cluster/data/plantTraits.rda0000644000175100001440000000656010432451454015670 0ustar hornikusers\{U;-Eڂ(P@P"sa[Z̮l A̽3;vEY"HU|A"Q1Ҙ` HE(Ҋ呵PR(48s<9Zor:ιw.ٗϖ$ITRFvIIYSW*ժ>ޙZO86zթ WsºWރyp=A{:a=zϛ/w}/2YGlo=isB3sGa._~FnyyrpF3nϏkɂG2W@"r[ϳ|alʿfz uŋu:<^wտg s\>3 )|WXq~L>OŁ|'_0eϝo\Exqzވ^=h~I}:z]vcBFy =Rw4 yn/8^B?ݠj}}} m]Xo?}qS!qiǪ{lЋ~&z8yP4GwG{2`||} ۭ^] E{<\|}D>b\ΝMOA'Цy_OE;~>)7Oxm{??a-'Zϻ^@޹˟9L369q^?vbyx Omϣo?poy>ʿ߾azOzvI2ڇm99x_hgF '/W~ͳ}|nOzWy苲OYމl#@}Wp}01H۴"ytYyA--H||K Ol>GF.NbO?%=(;8B]JFЁr%sA| ^>a=[CS?lj3(aN>T`P^JJrIlY3oOR,:Uk-7%?Ns}h\3xi6W1ϐb-OWXt6sGʹ>`? (O80;m[PO[;?NɅ7}'w2fC8L$m7֚_Q(joSIh3=0h5{3#I[+Vъq&#]kS36ӫ:4.Z{Cԙ؉3"Cd7i`->ՉEoP阒0o40vj34n]:@M-uTIKǮӒDrmi [iq➣QxgU9`Gt âPݨN`~b%%%:k#xD~h[oa+߂[@[~ŪyN+w'/`toTO@`2`W[a5f__AW@MCebvg$e9vևmDwY/]n|Ǩ?OQ~$’`;ɹRZ&kqkc[nx|Cߎ[+w+X Jt逭t7:=3ej 7q0B^],^VIE{Cӵv`UTJ=k4YQ*%uOK5kQU5X̖'uuquDQ waŮdYTbxY@umӦ6F^-pjYcdVWIBTBijZJQT,Ў;v*UEBrT8 Ou$bKUӈeUW42WJ:9Ul)a$)cW&WY X՛jQIsшVFjؖJ ]7QQjT'Q3J,UgDmfSB\֪&%,i)j%^E`{k X̨`j !t@Q)_KcXUA$ʴνvq [UXid +6b:4PڈI^⢚f262% \^b),o.QC/u6^Z lzTWGej NP-TF5@T{H~ͮ @ÄS U}Sm>m ^Wlziz[À0-@FX B7tVŴF)+ (C̨C3;,Ä25 <`i,o T[vQ+CTijCr[(`RsgXzQe!X5>b+ъ`q SmU&B)Whp+ C/ٌ:#`j>L^1Yfy[W*\`jj^XFt fP=dX\ZE UZ%%TFTU1 ר3Ԕ3N*PK.Ycluster/data/chorSub.rda0000644000175100001440000000255110417224251014760 0ustar hornikusers]{Sm.Yh^Jdή-+&ZS2C$#$֦Ҧ-R.[#5m{oO|~9V9U9ιLR2\n~w5W1XW|pbE;s`G -F\)S(-CЃa2< A_ؗao]ξ`8ua{aOCW >^e`2~7(=ث`OVlq-`_TyZ@ xEccPmjٝLzr/Xn]˂C7s~`*\5kmC_УЫAl0z-+Pk;Wkù?AE jk(> 6`)[|J9VX Ɣ%x:WcEڋgw~6<%z\NAj]u~Oꌲox,o,Y> *(4͍)4l畹zP|:T|X{rj7Kh{89(/i^۴׫ 8'^/hist瞂8g~ tUszj^uZA=xP-CSs:=SzP'Ti_܋jp+`Ծ𮽝{jכxi2بMt۫6|h(o+ոyV+wn {b8SeKd'S>ۭ*3Eg%MAGdL}/HfS\P3~(V)?3wK?Qn <7.~G>yݥ|wD֊tM.[9ם:;|g$9.kqos ̉ <3:sztSCQ8xw͊sԕ=߰%trsf !P1(uZ33(W{|lsN\ѻ/xq62cMK9`sX^x\ڽ#—/M۔Jgp%‹w8;yNzĽ9 aF'JoإQ|~60Vq>wbw݂2aÇ{wip1qu=rﺋK;XUߒ 5C 918|HʟՈ_hh)n& iM/dY7S7!b{~^Wt-.MW^[G=:a՗WvG &f<8:|]ޖf Gt~Wi̓qD=V7JN \%s.JԈX.w@/}OJzs]TǛV]O<;q{~gSΙbN~~;ᣑ֜G`n݁ew曾:c77-z3,09ڝ?xx6Y۾o6=%븥M!kn.7ݫ倻mVp7^*nِg%$ydc{ J!g:pe`-#\ :ܬW}^#t"/Xpt)MfI9N|%Uy= [껣M$i_NUTI .3,2GNf}W")w=sˈ#'k) B̓eeI:NT nAͤ:[[q͍Eg}\ YTA`Jƣh~8sWE6BDַ>A%8.ĶZ/i i)nɫSUj07:>ީ3Eo##]e@@1[ ~eNy^Ҷ\"7\h+C RpAFQn>rBQlkq~ayƻO5' \[_fo}sxq9,==n;ĈO_듳|v-Dľy1'8>.btDImVjD!-̜fKALŝϖŸgV/^ӧdiIY홢ԭ:xas:\0D~k5Ert郎!o.|ٙӒz6E- ” i6_<;ξ_y+.WS6pIswV:3FxI_S' R}y|X}flRFuSbtHy?q% Xy[8 WFZ<}"' )<74Z*uEЈ3j!NwΙ5=w9ä^-^JPuyyэ4CC'iV,&֎m.#} v.vU}ښ9}]k3 &&Q24.]4=C2]_Z P}}EfIm<ł~kw;хL{yw͵ߗt ܙ?F|utׯ7ڶyFa>e}!Ya/)o)cc|N~1lbR]pV/x$oU0. a5<oՅV 5 U޹ߵta"xg~g˷N n#“ ߕBe_7ԙ;19V >h0mg,xGdC:/.Or@El'skqG_8}FungWv*EOTW_LAC{^y g};F> o ^0?fi*5}c]B\ sOb&s#v!vwv>F%WrUl@٣_;F`s a $^(|OgCunf/]Vg"]gw#:l[z48n.\ ^}-ji3M(C3 ^ؠkӡͥWf P1ݻQ^]]~; Y'@@ݖupH|!iڷ]: #潞Wŕn7׻}u 7qT[`#yG:Hע"y\ cD_L[$1A HD5jj2ߘ|x1,X. >ЍG䫬Y8=)_mMBtÛ?ߜ&L]+ׂ  &xhȁRU혠3ٲ_qzz|L3O^U|s:g:zwiky՝ۧ8=Ϫ}q3L%@'<ԇfߧBh18_!aq0-58} !: gn9!?jRx?iiJ ([Ŵh|)tur2, c˵f\Gd^Z {ÃxJG|693}|}k?* 6GF;ke; 8:f$v=kԤ9ÎMr㶆9S=WV$J_If8% "4<ӏ]`&Qq)6X\3>] K;΁qZvD^ *mACGmMCS#4pޯ9\Y|@4{m81p0a3 +ِ&YvCn.xm+ Õ&g6}yu= >\uCm&OT=f~A #b-w0n]tN:7}hQދ[.Z qW I/U١ թMΧ ]U5qA,.x!M* Ǯ=ww< 3 ;Þ ;oJC@vV"q |zw $蒻<ΟN}e&ÖFM>;vD5 NG[x Q?BB ꦢ4̕Ӽݻ/jc:K$.<=+TKhd.wMG.ߜùqdq%fu=|--SrL/ ^j8EOnDF> ,U:׋;߻徇Z[ }>3LŠ|\SCa 6!<˧z>F[o=O.|1di>b?SJN@\@s] 1˭_׵-kuFW8m y@#뚞#Q[h8qu&>rs5x52zUNO羞 ̎}tx4-'”q_p]7}rel9~(nsO! ޗpl?7KŅ/ ՠvCq6۴wM!tl><1 /]&O6=&C#ap|Sۜuy&B94pjQؒUt[Ǹ,ӓKƟöTU˛ $QRo#цpMw̳@הCY [h+r\16'߲\&~)iN@鷛/ 0Feϩ:>n",~Y[^bfZ:SF(Gb3Tn!"B/[(/_P5 ثN?>kVvmXi}Uyk?|uG8sF}v%a6$&!Kn|; 3+m<+|GRͷs}^B|)gb3C;ɗ67їG?Bz;XE60cea؄"qDkypo:\&iaU>c3vwmji?}PhkV3z]T8>F9dY:ɏpo|.Q.#OBb?"F{`Ylݣv;U/XD=j}vksIs_[gek1\S}Tgaؐn+;o~#^[t?d/KQ$jp>SIK:!n+" l3ŝAARR(+*y>pqd_po:#(p~5>!~<¯h^{.Yma_< D ŏƫ݅ʚ/^'f-9 *VapXkx_Uٿv:0g8%=OS6 e#ЬKߑ6<"I!|?ٟ}$",U\ IQѢ*5XNݖn6PZDXnHt< {#r tm~dlo4 Aҙ{%aB)e%p0(Y`P>Uu-t=:uAZNN \}遪30JAuyMo7ݘ5WѦt|uxEk7G`b=O'(Zv@x ,Z7J`v[gGy=9}~ xM3l=_VVPdMۂ:fJ5SsppB듳|9,u֍ p[ۑaJC܃v&G(ADk֏ڃ65!S2prYߞѪ16>: nz`@}Jvuy~\??C+;]TGZo^Q=dIZ0?Ry67XzFyJW?IG/6'uν$mEr@M [>-vGg-n͈_6NA9*l}oU1{bɧ1e-?:ŕ}%?-x/ ~MY}f{{3u?*f!^図"PJ C$gR];5f;xq m5B@xm -}='m?IuMxy/$W"U1.Ki  ݉@^@H &mCm}񳖿^,bmy{ Ɵx[Xyx*E )H6!)H51{ ťuBZቯ @~^䈫ÿ=޸}2׎Ae [ү>N? S zn3z5+H˘#A1 MEnHBrN~[C<ٴC\2D[B7g Q8Gꂥ6x '$Z}W Hb겠do2AڐD5z٣7@S &(Wy=xAjun'cߐ@JW{[@yQJ!W6pKNЕ~gq(~5gՈxېϗV2.h!^SGNҪޭ~8x?v`ئ !c82j!#»W`ԆmEa16L?^;]zxpԨ3! i7Q-|~l.QEQ+3m s,)*@zwW?>- lf#ݦ z> RO~r~G} Rk^6R&G?Ojͧcn FoC2㇄|G?T%H`]*C$o!b= nW֕SnkI:oR by/|;RE,D,#/Al;%'z)YO]NȔ3AQIA߃(]nIG؏}hM Ytc g Af#YAM`=~ G^M+BX* Nq7*=d"UoH<4\7G#aZ.Ԉу QAQG'FUo !w Saرs&t  A҂.l / BQZ.jo{3H?~':㋎1I#Ɇ9́1S "y=G}$v-R[AW[҆8 /7.I*"+/CוjfϕHx,O߮4Cb$< oF5c'Osw>/ X(E'-d8"io_w}$H|Iҙ{H}c~,rtkgS%`N_ ذyP)m#Xp&x$yO~f쏽KLe/w=ԏ#랴+3N=!' =ZJ!A4Hm} x^j@TRE] s)AE/@n'SOa VҚz( 7?JpdDpXz7]!:э+ d"ma%rT#;_pz?˲3m#%o3!p4\Eh! b-}$w?yt" iz #UJ0m7{)iAW]B{3-NxirR)g8C4|SeyU+ K`nR/DP>qvq>LXyxw& AKS ,oF UAb;bGC4o@9ic$oR[Q8Y#Iܑh%7!4桉A$U\:]4C##Fl{/{14!di굿C z#[/2~_\H ato3_ƭ>˖#ƚF`dhbhO1{.w8~[{)<:i#+AZ\  P;g^t"mAGa'ɿLJ~Qb1k ɋ| ig!vqYd)qu40g t0O|o*o&ugnĽ@]{RR &sqo:Tat 7xqoI> hx#VJDj)Y TNɃTAX30xH#5)ߐb&?$|`حe1OxRqb+!5M[@g̈́uZ ¥E'/g:;"u;ǤT Bz H>~y0n5;ެ!wqJ>SH*$=hR3nNsbsqHf~,u AOA`L/!q"RMd(8@|s.*Y2H/bnXuR h#5+`/+W I~mL'⑲)* Ώ@0G[8Û#ׂJLbizz=&NuͳOXޓ,I DI;?2_t Ĥ;4>Bn|kXYp2ܑZ$*ۅ?!&E{[o.l^!߯UAx|> FLԒ *_ 7?xsO iŴQ$=a6Uw=_҈kvwΟ; vpBIG`゙+x /RIYNoڰ)dRmD_~Ǔ^t6߁{[wޯw]G07&I,#-8a F9 A)7^Jo/OxYĥjJv }RyߚRIO~>IүkiTV$ql+GvAJGuLשٌ_Jk/AG"(4\Dm$oW ޜKm<5CPUpQd$ gˈl !Ks\Ʊ=`|/hy#uZHzmziFBbT37h8 bӘ_KЕacB2\ўv}!bu"bu"CLSYfH3]1vbPVTĥlNF\xh1U-oT)vcy+i.)3 H9jXFAX2#= 0! d \5Ԯ yP1x̞O*Mg|f/nlpxZJ#6oLtk5oUtKSbQ$(}]E!FQl_%lO#ag)لwufzY|cspzcc1@-$]EJ%X~wjX.!Bp œUٿ5ӣEs!podpN欤Ye~!$7[9'9ޯ xˏ1̏$msm +rPǝv.\LNۨrH}r@ԃǽj *aS!Q@ĵd?z/.7ߏ$W7snROl0lwiIE#љ (y3)! - atLkAv,Mbz _Ŭ_"d~cR{&t4K^6GYBPx%jCXe^tMO!ЂKWg pʻiiI=m?qZ:B3 .̇UɁXBC$f8_BJH2]̻z| sdz8Y7 YZj?m)qa>/'׹¿b槗LfxC4Lg9bB$)Y]ԙ W}D3b3*^N8]qGGקTب~nBNE#?j5ʖD/sKwn# fO=*w3]'pSܳ#5 *T9mDml>^u%m6N&uU#![yPqV"ށHWMGIL=jrlϪ5R0߀ٛ0pLټ Ӌ%6`elD;t{!;"¾L?K(xV?!)g%M s=sH-"kG __u#wk'1?p;:!O\1v^X07EQL2Z"doO ?{iV7Q;Pq?6W$a9$DB1t'}60%o 7o+R0}k6@ p0P[oJ/2}<W7"6:6%OP[t;6#N?SkO"5r-MU0H?BVolgL $ezOlkj/0_ۃ Wr~>tj:/.!DtL]B*#&$>`+;G}02TWqفv$D:Pb~.btx0}OӚ90pK[sCFo=J,3U9!d::~i+/6ȋCisÚȖ1'.#OU/[e}k;2BI窗OGF S䍠m׮ܱ|er(mv+ZOX"ixCOFm0k&-<ݔ93P"6ge톐ckdc)c}X囚_B PfeІ0:ω"mÓXhW@jh|iC3CeFtu^y,(E}TЀ>&P˙\Hm.|PW"-\cϤ.'cxM~i[ 4PGz)z0m9z,T 3f 'LVZ%F73E ^B=}qQol¯ɋ i3Ukmƥ@me,'}Pr@֟I76ochzڬ@b֖/AdS^#dpd"_Έ3dY&D'+}(ۦV&$:<+H꠪#, e_4)Q--<*sV"(d 7K6,5[Ԓx;C ~%2屼 XL v #?/!E ?ڠoU q1L1P yȘ{t̪@>~OoWy^(O21au-cį+S-addW3(ҘRY{Eoxy z)2ЯWx}t^R|l;} бSddy]A8n*n  vϤi.)B?PgCM냩mt>?.bC=(?>msA6D _#EݟD7 kh#t`:d's:m8 M'T6L?6>Ozt(1Qٗa ]@z'B>Gd~Q: Ui\JCfCM)Dz7LLa}@u~m _VH uo*U6A`G'mW5ل>*qQ1_:ޤuxȯ *hLq(LS Tz? u+S3W$g״~e'̷, c~o|[}H^1ƃ+<^~cM>Nk5}$嫔_Ӊ |Bh:ܭy| +DI@;(xR=9?_,ҙŏ#\/qǮrbBƠ?AUV. y ܗk{ 2~u.f d/X=Y/RbN^n,TWF XNBվI+?xVP|c_wUJR_P^eQ Ϟ*vd%3YOҏBF=4coZ ])C"%BP=F*iJҚp>T;Ri=l>sPX1}N޺9 =F'|D"3ߖNСӧǦP≬L>{L,UD: ] d_B6udmlߕYK!V/pzmPGZ$hEB^N fyFd,,ObfvE/J Usض(| 2իbH3hSUg8_yKXT6銊<*-XK%e8DՊ2"}Z [X|*ߔ GB/ @y{3R)&A-a*cR烻^bzO_0+âZY6';_1ͳ20.fu?rf;y4Ef_%XʰGm;09oPW1}U*;-{ *SRxi( O!Q'"kxТ''O .|-(]s< QDWVCu% H}Jk߁ij[l `r3'͐f~ *k=ۘoF:!f|E+&!ǬbOq}Ҟ h~.̅")7Q9R9Q 25Q`GʁP4.;ϰ}/=Ivw[(ʅcYj5@P ҷϪV1q !Cg}Gu0wKHͪg q0M RSzEd~"S׬2f҈~¡|oQ6~H[@Ǎ{uۿ;5l~B8\Ve s 86$Y| Y- 3]<ÿsё::T6 LtZД L Vf}mU 2O1|e|C3W/Ĕ +Q+R'1^YU։6p- WY*d}ja]+_C᭿2mBw4LOȘ>jQPa~Tc'Qg. JEH.6\gJNij#}P}Hb^LOь`(62+%~DW#c-4R}YܪԬϤtMuW\-fL:q3;<憥nT6œwYAv5*]^-+Ǚ5&'opSE]4$O nT`f̋!PϯAYE44KuP.bzI&B^;#YM Nϱ%՗KM3vԍl=MS/Mv XÑIC%~j%4ՅA[#ËgP' ɓ{5ّ<; 2&5)a5OuzJ{U4~u[6ee0_2 5!N<,32dCI1uJ6oR/z {6Ԏ-CfF@[Bԛ'aPcx! ֓qʇyie:J!}C4g>tY 8P.G[BVu-$nX">(ʐf8OsnI!h|l0y '[}- _ M>}|yV=pPc/W*3yזq V:0RDO#.MiH;Ҥ?P;22KJwxCTk.{_xF(Ro##ůXߜ=7d{h _]y7:6 *[ϩDljIgOCYKՖE/%x5hx؎F况YpO O^`J6?Ij$yP7lW K3|6 ȳS?W8.t,՛$B9U"Sd\oYn]G:gb>ߚvkLӅ hrY?S̕myՏ }!3􆦢2寣jdd|,M/lK}n`:FFE')-\e^DΏ>3+˾6!D] )SVqw7{w&J<8=g3yt'P{Ζڋ AAHC!4zG+F]ތyK~lRk'_ \Q{J|aQfF|x\=%q𛔷;b1D)xw=Z)jTX8Aۣj>7t4L=aflC]l"[45dа(&CuSNIlp=ٹA]ΟHfu=fTosQ;)1{xO}t1FKh4[s 0b=[0ֳ7!ǖcHU_ف 1p+j4^LћB3O!#x^3\s<-o{oJAa 펁q*n= Aub1P5%3 ?n& ݨP^zPob`)4oTPxL> lj+a@q-c;& ~MGlW>P KG?Uwcib?aBN;X h_nt`J\*wS _'<>hj 6.@(PtH3ѩIWw_n|(?x;F,QGVc6gOvu]n[IuΧf׊>,5*pb-l$VP8ku&[ kNɞߗa@ob)P7 ?zhۏ-L:_aA!{̏o]қ}=ޒ>\>nm&rņg-讋իzP$`A\!қb=ڼP-!Q z{_\W_B{Z]? k5OW34'5:n|W粋426>9sPA:p?z Gf 4|[Sd @J}7S" |@߇Ao8[ ;f5n'?M[ho&$k<[wpVc!z&JwMǓ!r.{Q_d @u8*`Hzj^|G-pTs~8`Ӻq(c< I_Woϒ=->h~sS{$K}r0fh*~}Lkk :/<Χ`m4r*k$}𫰞@y]-YK2W/wo­eݾZ >}lf[tY3 ?MW׫7˞UzM ~a1V9qRs?D~;^|GA+HXw#0Ρy,fT:CF '[&s58ulgX),q͹v6Twž{P$t^t?F9NgwB;tvR?-|a my;xF5QٗYbnriO~s9ui޿z]sb#'o$~3A:_+J(}]6{d7hű< sE4cIߗfk Փ!kht<-{va"TŒmoG;*SdHKA&Z*ҫc{byg:!rJ_sy3!z JǢ3*wsƢ 1?ш[䋏2R])C-uC) _QaIG2t2ɟǛ #׻1e]){\iϊ? ]B:wIfuIoұ{ƛpuEƏ&5vWPkԗAdxI}nw~li~ϴcҬ ZZx"g;f 3յ5X.]%xIA{4v{߶]W A׹Ntw1?~#4I~H>߱+B hq$Cz fd|rYWG'VK W]}Oֹ!M&Ms; >i5%x_<6D\B 4}?yAB!hx==H2;lYMps˧-!3^A‡̰;[7h)aߐ(ܕj9GF7ܪ8q1@Ľ9&;Shw!SB vM._k ~}; kT-)W:;.Cxݶ.)/(>;L(bvCl;ڑ"06o=S1Z~] 9MZ$ȇ|wn۲1sE# "z1CDw4,uvٟN皮ZmgN8}M{q0i9_-G)ϫ:wF)Ѧ.bb ͨ-LR~ b\1^M$9'8HVyNfCf\=KKkWhU-Xo{- j*$wSiBg@PJP1#! ɚG߅g,|޿ԳsNGMPn!lZWKk>D?'&Ĥ}|toG-XnyzOw ў:be# MAW@auUPA>zMNk ݷmr TgG Bl԰S'\ko;tQI'Y&/m^3l?i֢yXg:3(?_H{Z^'=f7UNڮꆌv1<6H3P ۯ?qin)::c~.q-50}D7 3'-kfc+!Ϲz %򫳣\t{>0df;>E__d &3lI=G$p? cmamYs}|Rfnʾ ?3!']kG!v^~1ъK ޞ_ZM= FsVQtqo~'!ZKLn3HˎN|XNJJ.vCď';F\dO^^y 7;BtOoj;>jp/+C@zd&_n!'^EwyOd ]Uz9Yk9Hp\y<D?f}<}= Uj[g ~3X Y%)Q+ )~ܢ~d=}=J7S1xUzwJ!ًNܫr=%9'Yi%g@G_n863|;`= Z20a$nFͶAmk[WZ4:қ2YyKٓnF2˘<DJx.*Ypmё r| e#!N#P@#+^u)j \}~" y^#3jN2;$vb2pq)OD20/~6 E8k̭:y噁MiswM`/pqS;1=ɏhM;G.qY.kCFCZ_Щe {KabAhA`8KݤNQ1u02A&;CxAG&~y3$%opGϑ^$FiZV #6DnsnzYH.'}/:it+"̃ޛXhMӏ 9 } tƶ\ ބ'[@&g3amOûBz4>fX<6\PK'-8ΏtrWE!S>av0I "PBGIfxϡAmFy`ƚߎ]A#?%wU҉h6S^P,[q[bGN8,!/(Қ.%  FAJ_V.DC0xRPM⹫`M0eͶ,2r# FOq) &.`\e%lօӈSu!iDhR3蛓~̠a^mBAx^9UT 'ŤCKӏ=m_X׋gU.#, V('߸* KOV$gAWS~- (q&a2>E&4!S/-@Qr=wp8qsWu_oF~.D:nWү+SCz>btL#(H I$>Ѫĝw&ݼV|/uN|Y%tץ)'}n? :g5$^j9޵1g@ӨV#qt܁@\@XМ->VF@H4Vt@}+xR=q[Sͦ9I9 bVl[ٺ 4=ϯ]J\}+Z3թ]~;Pl>Xk JUa fOyeGHNE#,h-OWYh#!ߑPf\ȶЕ!w%swd=h ]̀`,xYM#߇ IF<qrhc$v9qU֤ϙ3]UEf-GЍO(E:ub66տ`LZ q;(>UA5Hok?4?!; NэER&RN&ñBԿBf+*oR4m8&)"Sl0(gR=/,Pj,ANf Յhy@E^ѩ}3n#C>% BhlY[ ޫJ.9kTn<}~`(ę3XNڜ5A'OÙhN|CٚPb5 dbC8H+{ƾ7|-y~RG~°/ RIu:Ro&RR̄ךgUO|F3(OD}P!}:=V j_EȫEn,(k$>Gi;=ALnuD{4*pcs>Z9PÉGIi`pb=MAѝ|T)UcN1=^(kӞ_rNx P3P(Kw!Pǯ3]uYA w!G| Afc㥷![G:WC>酕crK_SWFa ձ44})gsP!=.q Q^:+A[Fu͔KBh-<l!h< Rb]M:m2f5hINCz N}@:UQlE~El*gf?N9%_m]"P򝏡|.fUD&! [c8&ӂޝ]ڱTW*LH(|x>w!vspJ)EG#εzSH׬@sG. ȏAb.P?3:~T^؂42 kd,$v 0;(o/^CuZGϛ R}]xhB+>w¾l|K)&IUT-b=q-_.x=R|uiΐ_}\=:ƕHnKcЮ9 c hSΒh {Iߡ:K4U33hc@yڶg}Gܸ -ĝSP c @K(PW+~_=3u?ȄPPٛ x-ȿSFa0WهijH"^ ђsYxAEafA5qj6t(R}M0FU^&߸փL? ߠ^CyF>c_VC>*Q?1;(g(|K;Go fԏe0tXC\1E)kim7@,<@^$b9oē$}JAq;k+]@Ku7˨qrBytI/ cx&󤽞f8kCmFޝ;ržsS0xP׏oV\ɠQ稻ڡq?Z;ѴP:5U,-)/rU ߳p,/ TV O/{MP9mS1?oDyH!|n ڝTo)ҹ,KR@"6C)oĮGsBneؒ>=E\ uW|H>L y}`bؿ0'`k@u 8 TGi .Zf'I(ckY+@7*,*<=P` ?Fl| ]4OF]A"}k9^@>Uǘa Q%[Эw|Gc ?ת%pNK@{ma hi@38E;,?U[s@6Oy]AX!ą׮cxnN~\ڣB۪X\ȣХWqR'O~;?f %+tj9{>Q[ZM'tBA2w۲.\Eu =ZB}q~T˨#)?:Q;@zyU| נyv߅ڤ#aeL2)SU>(\aM9ڽ 9oⴣrhPna}!c 7߳Iw)9${!݋!!Ļ]=5e#h5j^;y+ƿx~߫NGv}-(%3jWU񇬖K WT ɷgxKs:M#hGOg[XCBʹRypu63%8[9"6\X& TjԏV@~4Mс8;hs'nn;;p8܊4vao7POc{qk׭xR^Ǖ'7q6ur)(ޟZ^CS4w 61# ~FPo@Vb@9$5FѯSUpuѼBu ߞxBOQ}lC\={`Zόsgе#OKh@`{I{$υbT)5Зx= 뮠"x_ >tlXo< (OĽЦK(}|m#+@u4Ub;Q;'@o'ߛ1mjVJWf9,0tH_EbݹCE p*SriNdO[E4q;c)WJ- =]|B{a7g=4O652|Kw{@T|_<F; @R_`D`ˤb߻P ڍNR}[Zpk(Nߛfv uȟ?L;5H)_)7G7b)[4/qڒubaOڊx^A\ t/dEߋk3*U-(#8' 4@T)XcֺS?}a_z2/?Nj6\ H[]~^7~.x_Ɣ_iBt7> pu*AԈ> __z~Id=ھô- 'E_eC%[ZFK_M!ӞDM{([PF +u ATXY,> NVC]ۖ.x);y],Z +}"BJ)@]G AxL*NWU۪ʧ { MWq-c$fI?TtT(gRZ>'Pt#t~OfˮFb$RPNf@:J!洆72{H@ews?*Cˎ;=K@*.sAWV(i._Cb^VbLdkh&Ӿ;+aM 6~B%sBuOgbE'}(׋sqV $Я|M;t/T"̨طD{jNL0銄9T\A9, Vjr֎ΒF/Ľպ}DѼմU^v#kC)?@Ս8$1u+oRνP6^]oМQXvgA=36bЕbk%cF |Jww}@ [@;U\L QԿ qS#,ߩRnƼ A?[6eAwz(Jt^Bu_NPeGujN/ҴzN à9K9=ĿS}cԷ"?A6ףz=kn=4Q/c˻&L@0XGM+q{1f5K<0xklPflZ*UjϢ|Y)s6̓xL9l׷E )CW]?>~7yN0JJ9?CW1LZ\cG<\>><7CsK?D,x!Q҃0X:O}-Nfqs`_Vַ18/=1pJn~q'ggGG=& =-=xPBITrSR8hLsa(eT8g^c- &z`̥-:B㪳qgWX΢Ĵ<=y6W->6~h 7^ȴȝ\%?k_^~OzLjoL">}w>4`]3y᭸8G"+ vvY!@мٳ7zS-Z>q 9]k@NwF;c\!YWt^c|?7 '<״1 c/{]w@5|}/&[swƟL@j8L<:iNuxֳ -İ}[ íˏ`ݒӇ=1X.ӧ&zs5gB|9ygtzF^.7o< m'|~( {}6s9ӭƤ2b*# Bo<\Lzu.Y.M ώ: 71S |~~~C0%A`[aC5|w,\#{p=.ƌ|^vc hocִt-a/9h,DG_5e7Y/B-jyar/Ǩ\wqwx*i_oa#:zx@;ڈ=ZG|/~/Qx7\cJڔ/X Vݸ~LR1xƻ/y٦ʟ%ң9x)N\DO~M6v] jkaB IRfG4}|1eW](=LzsMwwz6<M!=[1!e o.alsä_|nfz}_=&FVg(C=5ΓX6pY5։s} ߴFIk?< CZsE5RO6cr"^_ڱη0P?z5HmjAbX&4?r7e3ZKs06gTs/3z#4z=l}68/sa 9u&8|"Cmy Fi8{:tt4 vĺzr{Ig1qHU=vk5M2=12C3kZoxZ02 ft9ioZaF cLXt ?t} &'sI?\njz4>9;;d)8t2-Gp [u]la16}k-MO݉ ˽ɯ0!cЪMJV4m Lt;-VrԚev>K9e#(݂Qި82~~+~['K䅭*ΦrItQo›`Wͦ ky%w_<+z$^?;29Q{txoBQ8Zr$:.rJzo7:'f97 k=vn[ٗϥPZx Ŝ.|tsy7Q-xxz?pUrݎ(z<[۾o>)+gK kasX7wKk^ 1&׳aqfxQ?~N_:s!-#[Gvgbkh~Sݻ+(CWXvBC̩߅.̯t]3~_Ph7+τIZ/Č 2Xw{a7{CďM;oM:W٪g?Gۮ&{qv/ϧ̷Gf;V\E2=\r GL|5y )ƅZ`r /Ŝ'biς 9e/, *ƌtsVL:CgZ~;SCpޭ+݄!ݸ.A:C}B?Ji>rVG`S,]Nya6:^?,B2=˿ֿQe0z{&!tme:1֗)1֍1Efxގb}MztUw67nÿ<6OT{LA5|>>FΆ7n]x:'PnoSM:he|a:z!S; z< do:tp< (>7CD;+xS?|V[&S%0^cfcr{{8F6)}rdH~0;1+zgmZHjkצϬep1vˡ׀Sj9=(]Ź'(yumyF4W}m/^~W\wK0raa\_.ư?1U\CYon8”׬ LPN;d9GSO8z+SC2c/NG7 ==-'q~3P'a6畏w3/09{b029b5H0cŲemo6|,XV́EMcɟvk fcߛc&>+. pO>'BG3KDGQܯRba`GCՃ(m;L>usDgO&KXi{\4{Z1ϡ{b7mtIMV2pݱa@uvN*> \Z!'o2"/3/}9Pmq%久(!vj14];q=#Oof#0ȯ4X86!p-X]//u\!?8'δfAvEG?ncLnB؟< _ifo̶H+?5sWv+V:rL^E}Cva\~ʂou*#n{5vǘ0̜ 0>ڠt=DujJwE9Yߛ?*)kKM1b'ĭ9sNc ϕA8O ?G;g1n6+0_>WÐ+M~pֿ2b1wdGi/f|%~|ߍN\` ݌ߏv."> aO@#1K[Xwo,;#{3Yvl3 ҥV^Ym]HP Ҽ2 Jp}&s[_@!: usV? Hy']c>~f r9oeY\ObAXV.8Lš뻫埏717+7o8f=:;:3' ՙ _11 k+{vൔ$:L轿oӹ;En(n]}S՘$`yzXMb{:>xF[ûg qNK;M {"jJ;q91:sJGA{ҏ?9,SŪ ;s4fm5Ȼc~ڮvzUk5aHL_m%1.CU[.`Xˌ:c܈}~"w<3 =#WKۀNc\ T ^p6fn; sQvrX{oW~S"y.5&Dp ۄaW[gfTv0`ց#u1fEțaLm ?/X[c{ -wU\? ?2'%Ɵƴ4}ӋnrzHLƂb~_r幓+/4'|V!v֬Yxn/s \,ʅͭ̂|0N8 1p)6}ưY¶01əYUѣu}c/e^7~eQQE짌[vkU}[Gu(Rcluster/data/animals.tab0000644000175100001440000000111410400542206014764 0ustar hornikusers war fly ver end gro hai ant 1 1 1 1 2 1 bee 1 2 1 1 2 2 cat 2 1 2 1 1 2 cpl 1 1 1 1 1 2 chi 2 1 2 2 2 2 cow 2 1 2 1 2 2 duc 2 2 2 1 2 1 eag 2 2 2 2 1 1 ele 2 1 2 2 2 1 fly 1 2 1 1 1 1 fro 1 1 2 2 NA 1 her 1 1 2 1 2 1 lio 2 1 2 NA 2 2 liz 1 1 2 1 1 1 lob 1 1 1 1 NA 1 man 2 1 2 2 2 2 rab 2 1 2 1 2 2 sal 1 1 2 1 NA 1 spi 1 1 1 NA 1 2 wha 2 1 2 2 2 1 cluster/data/pluton.tab0000644000175100001440000000224210400542206014664 0ustar hornikusersPu238 Pu239 Pu240 Pu241 0.126 75.804 21.204 2.18 0.133 75.515 21.408 2.24 0.127 75.175 21.668 2.305 0.156 78.872 18.428 1.906 0.503 73.317 20.223 4.128 0.113 79.116 18.548 1.69 0.129 75.751 21.162 2.26 0.124 75.326 21.557 2.282 1.022 63.287 24.493 6.99 1.412 59.553 25.576 8.027 1.533 58.688 25.719 8.279 1.534 58.758 25.692 8.261 1.437 59.728 25.146 8.377 1.439 59.544 25.126 8.569 1.375 59.877 25.128 8.428 1.153 61.182 25.1 7.802 0.201 78.244 18.488 2.351 0.176 78.166 18.629 2.365 0.239 74.254 21.515 2.901 0.102 79.84 17.872 1.674 1.07 62.455 24.656 7.512 0.851 73.189 18.285 5.597 0.125 75.968 20.794 2.407 0.142 75.957 20.867 2.341 0.352 72.885 21.718 3.618 0.351 72.907 21.721 3.601 0.346 72.919 21.713 3.6 0.217 76.089 20.225 2.556 1.068 70.129 18.573 7.689 1.171 69.273 18.633 8.3 1.213 69.147 18.64 8.363 1.226 68.294 18.869 8.826 1.111 71.076 18.122 7.248 0.183 75.714 20.75 2.488 0.162 76.15 20.345 2.524 0.113 77.845 19.108 2.275 1.309 62.382 22.754 9.311 1.638 60.112 23.32 9.972 1.589 60.519 23.128 9.97 1.411 61.585 23.133 9.339 1.457 61.332 23.239 9.321 0.397 72.291 21.761 3.836 0.328 73.451 21.429 3.419 0.242 74.888 20.939 2.875 1.367 60.507 23.603 9.839 cluster/data/votes.repub.tab0000644000175100001440000002156610400542206015631 0ustar hornikusers"1856" "1860" "1864" "1868" "1872" "1876" "1880" "1884" "1888" "1892" "1896" "1900" "1904" "1908" "1912" "1916" "1920" "1924" "1928" "1932" "1936" "1940" "1944" "1948" "1952" "1956" "1960" "1964" "1968" "1972" "1976" "Alabama" NA NA NA 51.44 53.19 40.02 36.98 38.44 32.28 3.95 28.13 34.67 20.65 24.38 8.26 21.97 30.98 27.01 48.49 14.15 12.82 14.34 18.2 19.04 35.02 39.39 41.75 69.5 14 72.4 43.48 "Alaska" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 50.94 34.1 45.3 58.1 62.91 "Arizona" NA NA NA NA NA NA NA NA NA NA NA NA NA NA 12.74 35.37 55.41 41.26 57.57 30.53 26.93 36.01 40.9 43.82 58.35 60.99 55.52 50.4 54.8 64.7 58.62 "Arkansas" NA NA NA 53.73 52.17 39.88 39.55 40.5 38.07 32.01 25.11 35.04 40.25 37.31 19.73 28.01 38.73 29.28 39.33 12.91 17.86 20.87 29.84 21.02 43.76 45.82 43.06 43.9 30.8 68.9 34.97 "California" 18.77 32.96 58.63 50.24 56.38 50.88 48.92 52.08 49.95 43.76 49.13 54.48 61.9 55.46 0.58 46.26 66.24 57.21 64.7 37.4 31.7 41.35 42.99 47.14 56.39 55.4 50.1 40.9 47.8 55 50.89 "Colorado" NA NA NA NA NA NA 51.28 54.39 55.31 41.13 13.84 42.04 55.27 46.88 21.88 34.75 59.32 57.02 64.72 41.43 37.09 50.92 53.21 46.52 60.27 59.49 54.63 38.7 50.5 62.6 55.89 "Connecticut" 53.18 53.86 51.38 51.54 52.25 48.34 50.52 48.01 48.44 46.8 63.24 56.94 58.13 59.43 35.88 49.8 62.72 61.54 53.63 48.54 40.35 46.3 46.94 49.55 55.7 63.73 46.27 32.2 44.3 58.6 52.64 "Delaware" 2.11 23.71 48.2 40.98 50.99 44.55 47.86 42.75 43.55 48.55 52.94 53.65 54.04 52.09 32.85 50.2 55.71 57.71 65.03 50.55 43.43 45.05 45.27 50.04 51.75 55.09 49 39.1 45.1 59.6 47.27 "Florida" NA NA NA NA 53.52 50.99 45.83 46.82 39.94 NA 24.3 19.03 21.15 21.58 8.25 18.08 30.79 28.06 56.83 25.04 23.85 25.99 29.68 33.63 55.01 57.2 51.51 48.9 40.5 71.9 46.83 "Georgia" NA NA NA 35.72 43.77 27.94 34.33 33.84 28.33 21.8 36.82 28.56 18.32 31.4 4.27 7.07 28.57 18.19 43.37 7.77 12.6 14.84 18.25 18.31 30.34 33.22 37.44 54.1 30.4 75 33.02 "Hawaii" NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 49.97 21.2 38.7 62.5 48.72 "Idaho" NA NA NA NA NA NA NA NA NA 44.87 21.3 47.14 65.84 54.09 31.02 41.13 66.02 47.12 64.74 38.23 33.19 45.31 48.07 47.26 65.42 61.18 53.78 49.1 56.8 64.2 61.77 "Illinois" 40.25 50.68 54.41 55.69 56.27 50.09 51.11 50.16 49.54 45.7 55.65 52.83 58.77 54.52 22.13 52.56 67.81 58.84 56.93 42.04 39.69 48.54 48.05 49.24 54.84 59.52 49.8 40.5 47.1 59 51.11 "Indiana" 40.03 51.09 53.6 51.39 53 48.27 49.33 48.16 49.05 46.17 50.81 50.6 53.99 48.4 23.11 47.44 55.14 55.25 59.68 42.94 41.89 50.45 52.38 49.58 58.11 59.9 55.03 44 50.3 66.1 53.77 "Iowa" 49.13 54.87 64.23 61.92 64.18 58.58 56.85 52.42 52.36 49.6 55.46 57.99 63.37 55.62 24.3 54.06 70.91 55.06 61.8 39.98 42.7 52.03 51.99 47.58 63.76 59.06 56.71 38.1 53 57.6 50.51 "Kansas" NA NA 78.61 68.89 66.64 63.1 60.4 58.18 55.39 48.4 47.46 52.56 64.81 52.46 20.48 44.09 64.76 61.54 72.02 44.13 45.95 56.86 60.25 53.63 68.77 65.44 60.45 45.9 54.8 67.7 53.91 "Kentucky" 0.26 0.93 30.17 25.45 46.45 37.61 39.8 42.81 44.99 39.74 48.92 48.5 47.11 48.04 25.46 46.52 49.25 48.93 59.33 40.15 39.92 42.3 45.22 41.48 49.84 54.3 53.59 36 43.8 63.4 46.24 "Louisiana" NA NA NA 29.31 55.69 51.57 37.1 42.39 26.48 23.59 21.79 20.96 9.65 11.92 4.83 6.95 30.57 20.23 23.71 7.03 11.16 14.09 19.4 17.45 47.08 53.28 28.59 56.8 23.5 65.3 47 "Maine" 61.37 64.15 60.22 62.42 67.86 56.73 51.45 55.35 57.49 54.06 67.9 61.89 67.1 63 20.48 50.95 68.92 72.03 68.63 55.83 55.49 51.1 52.44 56.74 66.05 70.87 57.05 31.2 43.1 61.5 50.34 "Maryland" 0.32 3.11 55.1 32.8 49.65 43.94 45.37 46.09 47.4 43.48 54.6 51.5 48.83 48.85 23.69 44.78 55.11 45.29 57.06 36.04 37.04 40.83 48.15 49.4 55.36 60.04 46.39 34.5 41.9 61.3 46.87 "Massachusetts" 64.72 62.75 72.22 69.67 69.25 57.74 58.5 48.32 53.38 51.85 69.36 57.67 57.92 58.21 31.89 50.54 68.55 62.26 49.15 46.64 41.76 46.36 47 43.17 54.22 59.33 39.55 23.8 32.9 45.2 41.93 "Michigan" 56.98 57.18 55.89 56.98 62.67 52.45 52.54 47.89 49.73 47.81 53.85 58.1 69.5 61.93 27.63 52.09 72.8 75.37 70.36 44.45 38.76 49.85 49.18 49.23 55.44 55.63 48.84 33.3 41.5 56.2 52.68 "Minnesota" NA 63.42 59.06 60.8 61.55 58.77 62.28 58.8 54.12 45.96 56.62 60.21 73.95 59.11 19.25 46.35 70.78 51.18 57.77 36.29 31.01 47.66 46.86 39.89 55.33 53.68 49.16 36.2 41.5 51.6 44.3 "Mississippi" NA NA NA NA 63.47 31.92 29.94 36.25 26.63 2.64 7.27 9.73 5.46 6.56 2.47 4.91 14.03 7.6 17.9 3.55 2.74 4.19 6.44 2.62 39.56 24.46 24.67 87.1 13.5 78.2 49.21 "Missouri" NA 10.29 70.17 58.9 43.65 41.23 38.65 46.01 45.31 41.97 45.24 45.94 49.92 48.5 29.75 46.94 54.57 49.58 55.58 35.08 38.16 47.5 48.43 41.5 50.71 49.87 49.74 36 44.9 62.2 48.22 "Montana" NA NA NA NA NA NA NA NA NA 42.54 19.72 39.84 54.21 46.98 23.19 37.57 61.13 42.5 58.37 36.07 27.59 40.17 44.93 43.15 59.39 57.13 51.1 41.1 50.6 57.9 53.65 "Nebraska" NA NA NA 64.14 70.12 59.65 62.87 57.33 53.51 43.57 45.98 50.46 61.38 47.6 21.73 41.08 64.68 47.09 63.19 35.29 40.74 57.19 58.58 54.15 69.15 65.51 62.07 47.4 59.8 70.5 60.31 "Nevada" NA NA 59.84 55.39 57.43 52.73 47.6 56.21 57.23 25.84 18.79 37.75 56.66 43.93 15.89 36.4 56.92 41.76 56.54 30.59 27.19 39.92 45.38 47.26 61.45 57.97 48.84 41.4 47.5 63.7 52.27 "New Hampshire" 53.59 56.89 52.56 55.02 53.95 51.84 51.94 51.15 50.35 51.11 68.74 59.34 60.14 59.32 37.43 49.06 59.84 59.83 58.66 50.42 47.98 46.78 47.87 52.41 60.92 66.11 53.42 36.1 52.1 64 55.68 "New Jersey" 28.52 48.13 47.16 49.12 54.22 47 49.02 47.31 47.52 46.24 59.66 55.27 56.68 56.79 20.54 54.35 67.6 62.16 59.77 47.59 39.57 47.93 48.95 50.33 56.81 64.68 49.16 34.4 46.1 61.6 50.99 "New Mexico" NA NA NA NA NA NA NA NA NA NA NA NA NA NA 35.91 46.53 54.68 48.52 59.01 35.76 36.5 43.28 46.44 42.93 55.39 57.81 49.41 41 51.8 61 51.04 "New York" 46.14 53.71 50.46 49.41 53.12 48.07 50.32 48.15 49.2 45.59 57.55 53.1 53.13 53.11 28.68 51.53 64.56 55.76 49.79 41.33 38.97 48.04 47.3 45.99 55.45 61.2 47.27 31.4 44.3 58.5 47.84 "North Carolina" NA NA NA 53.37 57.48 46.36 48.04 46.58 47.14 35.79 46.87 45.47 39.7 45.55 11.94 41.73 43.22 39.72 54.87 29.28 26.6 25.97 33.29 32.68 46.09 49.34 47.89 43.8 39.5 69.5 44.43 "North Dakota" NA NA NA NA NA NA NA NA NA 48.5 55.58 62.11 74.83 60.87 26.67 49.2 77.79 47.68 54.8 28 26.58 55.06 53.84 52.17 70.97 61.72 55.42 42 55.9 62.1 52.93 "Ohio" 48.49 52.33 56.33 53.97 53.24 50.21 51.73 50.99 49.51 47.66 51.86 52.3 59.73 51.03 26.82 44.18 58.47 58.33 64.89 47.04 37.43 47.8 50.18 49.24 56.76 61.11 53.28 37.1 45.2 59.6 49.9 "Oklahoma" NA NA NA NA NA NA NA NA NA NA NA NA NA 43.03 35.69 33.21 50.11 42.82 63.72 26.7 32.69 42.23 44.2 37.25 54.59 55.13 59.02 44.3 47.7 73.7 50.52 "Oregon" NA 34.48 53.9 49.63 58.74 50.91 50.51 50.99 53.82 44.59 50.07 55.25 67.06 56.39 25.3 48.47 60.2 51.01 64.18 36.89 29.64 45.63 46.94 49.78 60.54 55.25 52.62 36.3 49.8 52.4 50.01 "Pennsylvania" 33.95 56.25 51.75 52.2 62.18 50.61 50.84 52.68 52.73 51.45 60.98 60.74 67.99 58.84 22.4 54.25 65.8 65.35 65.24 50.84 40.85 46.34 48.36 50.93 52.74 56.49 48.74 34.5 44 59.1 48.57 "Rhode Island" 57.85 61.22 62.2 66.49 71.94 59.29 62.25 58.07 53.89 50.71 68.33 59.74 60.6 60.76 35.57 51.08 63.97 59.63 49.55 43.31 40.18 43.24 41.26 41.44 50.89 58.31 36.37 19.1 31.8 53 44.24 "South Carolina" NA NA NA 57.93 75.95 50.26 33.97 23.72 17.27 18.99 13.51 7.04 4.63 5.97 1.06 2.43 3.9 2.21 8.54 1.89 1.43 4.37 4.46 3.78 49.28 25.18 48.76 58.9 38.1 70.8 43.54 "South Dakota" NA NA NA NA NA NA NA NA NA 49.48 49.48 56.73 71.09 58.84 NA 49.8 60.74 49.69 60.18 34.4 42.49 57.41 58.33 51.84 69.27 58.39 58.21 44.4 53.3 54.2 50.92 "Tennessee" NA NA NA 68.33 47.57 40.21 44.53 47.83 45.85 37.51 46.23 44.93 43.4 45.95 23.84 42.7 51.28 43.59 53.76 32.46 30.81 32.36 39.22 36.87 49.98 49.21 52.92 44.5 37.8 67.7 43.21 "Texas" NA NA NA NA 40.71 29.96 23.95 28.63 24.73 19.28 30.75 30.83 21.9 22.35 8.77 17.45 23.54 19.78 51.77 11.35 12.31 19.13 16.64 24.6 53.13 55.27 48.52 36.7 39.9 66.2 48.01 "Utah" NA NA NA NA NA NA NA NA NA NA 17.27 50.59 61.45 56.19 37.46 37.82 55.93 49.26 53.58 41.05 29.79 37.59 39.42 45.52 58.93 64.56 54.81 45.3 56.5 67.6 64.94 "Vermont" 78.23 75.79 76.1 78.57 78.26 68.58 69.88 66.54 71.24 68.1 80.1 75.79 77.98 75.12 37.13 62.44 75.87 78.22 66.88 57.66 56.44 54.79 57.06 61.55 71.46 72.18 58.65 33.7 52.8 62.7 56.01 "Virginia" 0.19 1.15 NA NA 50.48 40.62 39.52 48.9 49.47 38.75 45.9 43.81 36.67 38.36 17 32.05 37.85 32.79 53.91 30.09 29.39 31.55 37.39 41.04 56.32 55.37 52.44 46.5 41.4 67.8 50.73 "Washington" NA NA NA NA NA NA NA NA NA 41.45 41.84 53.44 69.95 57.47 21.82 43.89 55.96 52.24 67.06 33.94 29.88 40.58 42.24 42.68 54.33 53.91 50.68 38 45.1 56.9 51.37 "West Virginia" NA NA 68.95 58.84 51.82 42.47 41.03 47.74 49.03 46.94 52.23 54.27 55.26 53.41 21.1 49.38 55.3 49.45 58.43 44.47 39.2 42.9 45.11 42.24 48.08 54.08 47.27 32.1 40.8 63.6 41.95 "Wisconsin" 55.29 56.58 55.88 56.24 54.6 50.9 54.04 50.38 49.77 46.02 59.93 60.04 63.24 54.52 32.68 49.25 70.65 37.06 53.52 31.19 30.26 48.32 50.37 46.28 60.95 61.58 51.77 37.9 47.9 53.4 49.16 "Wyoming" NA NA NA NA NA NA NA NA NA NA NA NA NA NA 34.42 41.86 62.38 52.39 63.68 40.82 37.47 46.89 51.23 47.27 62.71 60.04 55.05 43.4 55.8 69 59.85 cluster/PORTING0000644000175100001440000000503107302543047013021 0ustar hornikusers* R/mona.q: Replace if(!is.matrix(x)) stop(message = "x is not a matrix.") by if(!is.matrix(x) && !is.data.frame(x)) stop("x must be a matrix or data frame.") Comment x2 <- paste(x2, collapse = "") and replace storage.mode(x2) <- "character" by storage.mode(x2) <- "integer" * R/plothier.q: Replace `pick <- 2' by `pick <- 3'. (Undo when plclust is available.) -- undone for version 1.3-2 Replace invisible(return(x)) by return(invisible(x)) * R/plotpart.q: Replace invisible(return(x)) by return(invisible(x)) Replace `pick <- 2' by `pick <- 4'. (Undo when clusplots really work.) In code for clusplot.default(), remove cmd() and replace x1 <- cmd(x, k = 2, eig = T, add = T) if(x1$ac < 0) x1 <- cmd(x, k = 2, eig = T) by x1 <- cmdscale(x, k = 2, eig = T) (Unfix when we have the `add' argument to cmdscale().) Replace `rep.int' by `rep'. Replace `text.default' by `text'. * R/zzz.R: Add .First.lib <- function(lib, pkg) { require(mva) library.dynam("cluster", pkg, lib) assign("plclust", .Alias(plot.hclust), pos = "package:cluster") } * src/mona.f: Replace CHARACTER KX(NN,JPP),NZF by INTEGER KX(NN,JPP),NZF Change all '0' to 0. Change all '1' to 1. * R/daisy.q: * src/daisy.f: (BDR) Rewrite to pass integers rather than C character strings to Fortran. ************************************************************************ The directory `man' contains R documentation sources converted via `Sd2Rd -x' from the S documentation sources. (In earlier versions, it was helpful to run `.CONV/FIXME.pl' before converting.) * man/fanny.Rd: Replace the displayed equation by \deqn{\sum_{v=1}^k \frac{\sum_{i=1}^n\sum_{j=1}^n u_{iv}^2 u_{jv}^2 d(i,j)}{ 2 \sum_{j=1}^n u_{jv}^2}}{ SUM_v (SUM_(i,j) u(i,v)^2 u(j,v)^2 d(i,j)) / (2 SUM_j u(j,v)^2)} All examples hand-edited! ************************************************************************ ============== Martin Maechler (many things are in Changelog!) =========== src/clara.f : ~~~~~~~~~~~ *) to lowercase only : tr A-Z a-z < clara.f.~1~ > clara.f.~2~ 1) to lowercase and change initial comments to 'c' (because of Emacs' indentation): tr A-Z a-z < clara.f.~1~ | sed '/^cc/s//c/'> clara.f.~2~ 2) Inside Emacs of clara.f.~2~ C-x C-w (write-file "clara.f.~3~") Repeat about 6 times M-C-q (fortran-indent-subprogram) M-C-e (end-of-fortran-subprogram) M-> (end-of-buffer) C-x C-o (delete-blank-lines) C-x h (mark-whole-buffer) M-x tabify (tabify (point-min) (point-max)) cluster/R/0000755000175100001440000000000012553131040012144 5ustar hornikuserscluster/R/clusGap.R0000644000175100001440000001372512470407277013715 0ustar hornikusers#### Originally from orphaned package SLmisc #### (Version: 1.4.1, 2007-04-12, Maintainer: Matthias Kohl ) #### License: GPL (version 2 or later) #### #### which said #### "function corresponds to function gap in package SAGx" ## MM: SAGx is now in Bioconductor --- 1.10.1{devel} or 1.11.1{release} ## had gap() *corrected* to re-cluster using FUNcluster --> see ./gap-SAGx.R.~orig~ ## ## MM: Package 'lga' -- has gap() and lga and robust lga [-> UBC] ## - it uses boot() nicely [2012-01: ORPHANED because Justin Harrington is amiss] ## MM: renamed arguments, and changed almost everything clusGap <- function (x, FUNcluster, K.max, B = 100, verbose = interactive(), ...) { stopifnot(is.function(FUNcluster), length(dim(x)) == 2, K.max >= 2, (n <- nrow(x)) >= 1, ncol(x) >= 1) if(B != (B. <- as.integer(B)) || (B <- B.) <= 0) stop("'B' has to be a positive integer") if(is.data.frame(x)) x <- as.matrix(x) ii <- seq_len(n) W.k <- function(X, kk) { clus <- if(kk > 1) FUNcluster(X, kk, ...)$cluster else rep.int(1L, nrow(X)) ## ---------- = = -------- kmeans() has 'cluster'; pam() 'clustering' 0.5* sum(vapply(split(ii, clus), function(I) { xs <- X[I,, drop=FALSE] sum(dist(xs)/nrow(xs)) }, 0.)) } logW <- E.logW <- SE.sim <- numeric(K.max) if(verbose) cat("Clustering k = 1,2,..., K.max (= ",K.max,"): .. ", sep='') for(k in 1:K.max) logW[k] <- log(W.k(x, k)) if(verbose) cat("done\n") ## Scale 'x' into "hypercube" -- we later fill with H0-generated data xs <- scale(x, center=TRUE, scale=FALSE) m.x <- rep(attr(xs,"scaled:center"), each = n)# for back transforming V.sx <- svd(xs)$v rng.x1 <- apply(xs %*% V.sx, # = transformed(x) 2, range) logWks <- matrix(0., B, K.max) if(verbose) cat("Bootstrapping, b = 1,2,..., B (= ", B, ") [one \".\" per sample]:\n", sep="") for (b in 1:B) { ## Generate "H0"-data as "parametric bootstrap sample" : z1 <- apply(rng.x1, 2, function(M, nn) runif(nn, min=M[1], max=M[2]), nn=n) z <- tcrossprod(z1, V.sx) + m.x # back transformed for(k in 1:K.max) { logWks[b,k] <- log(W.k(z, k)) } if(verbose) cat(".", if(b %% 50 == 0) paste(b,"\n")) } if(verbose && (B %% 50 != 0)) cat("",B,"\n") E.logW <- colMeans(logWks) SE.sim <- sqrt((1 + 1/B) * apply(logWks, 2, var)) structure(class = "clusGap", list(Tab = cbind(logW, E.logW, gap = E.logW - logW, SE.sim), ## K.max == nrow(T) n = n, B = B, FUNcluster=FUNcluster)) } ## lga/R/gap.R --- has for Tibshirani et al (2001): ## ElogWks[k,] <- c(mean(BootOutput), sqrt(var(BootOutput)*(1+1/B))) ## GAP[k] <- ElogWks[k,1] - logWks[k] ## if (k > 1) ## if(GAP[k-1] >= GAP[k]-ElogWks[k,2] & !doall) ## finished <- TRUE ## so they effectively only look for the *first* (local) maximum which .. ## MM: <==> diff(GAP) = GAP[k] - GAP[k-1] <= +SE.sim[k] ## criteria.DandF() -- Dudoit and Fridlyand (2002) ## ---------------- looks at the *global* maximum and then to the left.. ## y <- x$data ## crit <- diff(y[which.max(y[,"Gap"]), c("Sks", "Gap")]) ## nclust <- min(which(y[,"Gap"] > crit)) ## return(ifelse(nclust == nrow(y), NA, nclust)) maxSE <- function(f, SE.f, method = c("firstSEmax", "Tibs2001SEmax", "globalSEmax", "firstmax", "globalmax"), SE.factor = 1) { method <- match.arg(method) stopifnot((K <- length(f)) >= 1, K == length(SE.f), SE.f >= 0, SE.factor >= 0) fSE <- SE.factor * SE.f switch(method, "firstmax" = { ## the first local maximum (== firstSEmax with SE.factor == 0) decr <- diff(f) <= 0 # length K-1 if(any(decr)) which.max(decr) else K # the first TRUE, or K }, "globalmax" = { which.max(f) }, "Tibs2001SEmax" = { ## The one Tibshirani et al (2001) proposed: ## "the smallest k such that f(k) >= f(k+1) - s_{k+1}" g.s <- f - fSE if(any(mp <- f[-K] >= g.s[-1])) which.max(mp) else K }, "firstSEmax" = { ## M.Maechler(2012): rather .. ## look at the first *local* maximum and then to the left ..: decr <- diff(f) <= 0 # length K-1 nc <- if(any(decr)) which.max(decr) else K # the first TRUE, or K if(any(mp <- f[seq_len(nc - 1)] >= f[nc] - fSE[nc])) which(mp)[1] else nc }, "globalSEmax" = { ## Dudoit and Fridlyand (2002) *thought* Tibshirani proposed.. ## in 'lga', see criteria.DandF(): ## looks at the *global* maximum and then to the left.. nc <- which.max(f) if(any(mp <- f[seq_len(nc - 1)] >= f[nc] - fSE[nc])) which(mp)[1] else nc }) } print.clusGap <- function(x, method="firstSEmax", SE.factor = 1, ...) { method <- match.arg(method, choices = eval(formals(maxSE)$method)) stopifnot((K <- nrow(T <- x$Tab)) >= 1, SE.factor >= 0) cat("Clustering Gap statistic [\"clusGap\"].\n", sprintf("B=%d simulated reference sets, k = 1..%d\n",x$B, K), sep="") nc <- maxSE(f = T[,"gap"], SE.f = T[,"SE.sim"], method=method, SE.factor=SE.factor) cat(sprintf(" --> Number of clusters (method '%s'%s): %d\n", method, if(grepl("SE", method)) sprintf(", SE.factor=%g",SE.factor) else "", nc)) print(T, ...) invisible(x) } plot.clusGap <- function(x, type="b", xlab = "k", ylab = expression(Gap[k]), do.arrows=TRUE, arrowArgs = list(col="red3", length=1/16, angle=90, code=3), ...) { stopifnot(is.matrix(Tab <- x$Tab), is.numeric(Tab)) K <- nrow(Tab) k <- seq_len(K) # == 1,2,... k gap <- Tab[, "gap"] plot(k, gap, type=type, xlab=xlab, ylab=ylab, ...) if(do.arrows) do.call(arrows, c(list(k, gap+ Tab[, "SE.sim"], k, gap- Tab[, "SE.sim"]), arrowArgs)) invisible() } cluster/R/fanny.q0000644000175100001440000002110512540507660013453 0ustar hornikusers#### $Id: fanny.q 6953 2015-06-18 09:30:24Z maechler $ fanny <- function(x, k, diss = inherits(x, "dist"), memb.exp = 2, metric = c("euclidean", "manhattan", "SqEuclidean"), stand = FALSE, iniMem.p = NULL, cluster.only = FALSE, keep.diss = !diss && !cluster.only && n < 100, keep.data = !diss && !cluster.only, maxit = 500, tol = 1e-15, trace.lev = 0) { if((diss <- as.logical(diss))) { ## check type of input vector if(anyNA(x)) stop("NA values in the dissimilarity matrix not allowed.") if(data.class(x) != "dissimilarity") { # try to convert to if(!is.null(dim(x))) { x <- as.dist(x) # or give an error } else { ## possibly convert input *vector* if(!is.numeric(x) || is.na(n <- sizeDiss(x))) stop("'x' is not and cannot be converted to class \"dissimilarity\"") attr(x, "Size") <- n } class(x) <- dissiCl if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified" } ## prepare arguments for the Fortran call n <- attr(x, "Size") dv <- as.double(c(x, 0))# add extra one jp <- 1 mdata <- FALSE ndyst <- 0L x2 <- double(n) jdyss <- 1 } else { ## check input matrix and standardize, if necessary x <- data.matrix(x) if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.") x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x metric <- match.arg(metric) ## put info about metric, size and NAs in arguments for the Fortran call ndyst <- which(metric == eval(formals()$metric))# 1, 2, or 3 n <- nrow(x2) jp <- ncol(x2) if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs jtmd <- as.integer(ifelse(apply(inax, 2, any), -1, 1)) ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE))) x2[inax] <- valmisdat } dv <- double(1 + (n * (n - 1))/2) jdyss <- 0 } if((k <- as.integer(k)) < 1 || k > n%/%2 - 1) stop("'k' (number of clusters) must be in {1,2, .., n/2 -1}") if(length(memb.exp) != 1 || (memb.exp <- as.double(memb.exp)) < 1 || memb.exp == Inf) stop("'memb.exp' must be a finite number > 1") if((maxit <- as.integer(maxit)[1]) < 0) stop("'maxit' must be non-negative integer") computeP <- is.null(iniMem.p) # default: determine initial membership in C if(computeP)# default: determine initial membership in C iniMem.p <- matrix(0., n, k)# all 0 -> will be used as 'code' else { dm <- dim(iniMem.p) if(length(dm) !=2 || !all(dm == c(n,k)) || !is.numeric(iniMem.p) || any(iniMem.p < 0) || !isTRUE(all.equal(unname(rowSums(iniMem.p)), rep(1, n)))) stop("'iniMem.p' must be a nonnegative n * k matrix with rowSums == 1") if(!is.double(iniMem.p)) storage.mode(iniMem.p) <- "double" } stopifnot(length(cluster.only) == 1) stopifnot(length(trace.lev) == 1) ## call Fortran routine storage.mode(x2) <- "double" res <- .C(cl_fanny, as.integer(n), as.integer(jp), k, x2, dis = dv, ok = as.integer(jdyss), if(mdata) rep(valmisdat, jp) else double(1), if(mdata) jtmd else integer(jp), ndyst, integer(n), # nsend integer(n), # nelem integer(n), # negbr double(n), # syl p = iniMem.p, dp = matrix(0., n, k),# < must all be 0 on entry! avsil = double(k),# 'pt' integer(k), # nfuzz double(k), # esp double(k), # ef double(n), # dvec ttsil = as.double(0), obj = as.double(c(cluster.only, trace.lev, computeP, 0)),# in & out! clu = integer(n), silinf = if(cluster.only) 0. else matrix(0., n, 4), memb.exp = memb.exp,# = 'r' tol = as.double(tol), maxit = maxit) if(!(converged <- res$maxit > 0)) { warning(gettextf( "FANNY algorithm has not converged in 'maxit' = %d iterations", maxit)) } if(!cluster.only) sildim <- res$silinf[, 4] if(diss) { if(keep.diss) disv <- x labs <- attr(x, "Labels") } else { ## give warning if some dissimilarities are missing. if(res$ok == -1) stop("No clustering performed, NA-values in the dissimilarity matrix.") labs <- dimnames(x)[[1]] if(keep.diss) { disv <- res$dis[ - (1 + (n * (n - 1))/2)] # drop the extra one disv[disv == -1] <- NA class(disv) <- dissiCl attr(disv, "Size") <- nrow(x) attr(disv, "Metric") <- metric attr(disv, "Labels") <- labs } } ## add labels, dimnames, etc to Fortran output: if(length(labs) != 0) { if(!cluster.only) sildim <- labs[sildim] dimnames(res$p) <- list(labs, NULL) names(res$clu) <- labs } coeff <- if(memb.exp == 2) res$obj[3:4] else { ## usual partition coefficient with " ^ 2 " : cf <- sum(res$p ^ 2) / n c(cf, (k * cf - 1)/(k - 1)) } names(coeff) <- c("dunn_coeff", "normalized") if(abs(coeff["normalized"]) < 1e-7) warning("the memberships are all very close to 1/k. Maybe decrease 'memb.exp' ?") k.crisp <- res$obj[1] res$obj <- c("objective" = res$obj[2]) r <- list(membership = res$p, coeff = coeff, memb.exp = memb.exp, clustering = res$clu, k.crisp = k.crisp, # 'obj*': also containing iterations for back compatibility: objective = c(res$obj, "tolerance" = res$tol), convergence = c(iterations = res$maxit, converged = converged, maxit = maxit), diss = if(keep.diss) disv, call = match.call()) if(k != 1 && !cluster.only) { dimnames(res$silinf) <- list(sildim, c("cluster", "neighbor", "sil_width", "")) r$silinfo <- list(widths = res$silinf[, -4], clus.avg.widths = res$avsil[1:k], avg.width = res$ttsil) } if(keep.data && !diss) { if(mdata) x2[x2 == valmisdat] <- NA r$data <- x2 } class(r) <- c("fanny", "partition") r } ## non-exported: .print.fanny <- function(x, digits = getOption("digits"), ...) { cat("Fuzzy Clustering object of class 'fanny' :") print(formatC(cbind(" " = c("m.ship.expon." = x$memb.exp, x$objective[c("objective", "tolerance")], x$convergence, "n" = nrow(x$membership))), digits = digits), quote = FALSE, ...) k <- ncol(x$membership) cat("Membership coefficients (in %, rounded):\n"); print(round(100 * x$membership), ...) cat("Fuzzyness coefficients:\n"); print(x$coeff, digits = digits, ...) cat("Closest hard clustering:\n"); print(x$clustering, ...) if(x$k.crisp < k) cat(sprintf("k_crisp (= %d) < k !!\n", x$k.crisp)) } print.fanny <- function(x, digits = getOption("digits"), ...) { .print.fanny(x, digits = digits, ...) cat("\nAvailable components:\n") print(names(x), ...) invisible(x) } summary.fanny <- function(object, ...) { class(object) <- "summary.fanny" object } print.summary.fanny <- function(x, digits = getOption("digits"), ...) { .print.fanny(x, digits = digits, ...) if(length(x$silinfo) != 0) { cat("\nSilhouette plot information:\n") print(x$silinfo[[1]], ...) cat("Average silhouette width per cluster:\n") print(x$silinfo[[2]], ...) cat("Average silhouette width of total data set:\n") print(x$silinfo[[3]], ...) } if(!is.null(x$diss)) { ## Dissimilarities: cat("\n"); print(summary(x$diss, ...)) } cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } ## FIXME: Export and document these! ----------------------- ## Convert crisp clustering vector to fuzzy membership matrix as.membership <- function(clustering, keep.names = TRUE) { stopifnot(is.numeric(clustering), clustering == round(clustering)) n <- length(clustering) k <- length(u <- sort(unique(clustering))) r <- matrix(0L, n, k) if(k == 0 || n == 0) return(r) if(keep.names) dimnames(r) <- list(names(clustering), NULL) if(any(u != 1:k)) clustering <- match(clustering, u) r[cbind(1:n, clustering)] <- 1L r } ## "Generalized Inverse" transformation: ## Convert fuzzy membership matrix to closest crisp clustering vector toCrisp <- function(m) { dm <- dim(m) if(length(dm) != 2 || !is.numeric(m) || any(m < 0) || !isTRUE(all.equal(unname(rowSums(m)), rep(1, dm[1])))) stop("'m', a membership matrix, must be nonnegative with rowSums == 1") apply(m, 1, which.max) } cluster/R/0aaa.R0000644000175100001440000000114412540507660013104 0ustar hornikusers## Ensure consistent "diss.." class --- make "namespace-private-global ! dissiCl <- c("dissimilarity", "dist") if((Rv <- getRversion()) < "3.1.0") { anyNA <- function(x) any(is.na(x)) if(Rv < "3.0.0") { rep_len <- function(x, length.out) rep(x, length.out=length.out) ## if(Rv < "2.15") ## paste0 <- function(...) paste(..., sep = '') } }; rm(Rv) ## Not exported, and only used because CRAN checks must be faster doExtras <- function() { interactive() || nzchar(Sys.getenv("R_CLUSTER_CHECK_EXTRA")) || identical("true", unname(Sys.getenv("R_PKG_CHECKING_doExtras"))) } cluster/R/plothier.q0000644000175100001440000001547612402021401014161 0ustar hornikusers### $Id: plothier.q 6800 2014-09-04 08:29:53Z maechler $ pltree <- function(x, ...) UseMethod("pltree") ## note: pltree() can have an 'xlab' in "..." (plot.hclust has an explicit one) pltree.twins <- function(x, main = paste("Dendrogram of ", deparse(x$call)), labels = NULL, ylab = "Height", ...) { plot(as.hclust(x), labels = labels, ##- if(is.null(labels) && length(x$order.lab) != 0) ##- labels <- x$order.lab[sort.list(x$order)] ##- ##- ## calling plot.hclust() via generic : ##- plot(structure(list(merge = x$merge, order = x$order, ##- height = sort(x$height), labels = labels, ##- call = x$call, method = x$method), ##- class = "hclust"), main = main, ylab = ylab, ...) } bannerplot <- function(x, w = rev(x$height), fromLeft = TRUE, main=NULL, sub=NULL, xlab = "Height", adj = 0, col = c(2, 0), border = 0, axes = TRUE, frame.plot = axes, rev.xax = !fromLeft, xax.pretty = TRUE, labels = NULL, nmax.lab = 35, max.strlen = 5, yax.do = axes && length(x$order) <= nmax.lab, yaxRight = fromLeft, y.mar = 2.4 + max.strlen / 2.5, ...) { m <- max(w) if(axes) { if(xax.pretty) { at.vals <- if(!is.logical(xax.pretty)) pretty(c(0,w), n = xax.pretty) else pretty(c(0,w)) n <- length(at.vals <- at.vals[at.vals <= m]) if(at.vals[n] * 1.01 < m) { lab.vals <- c(at.vals, signif(m, 3)) at.vals <- c(at.vals, m) } else lab.vals <- at.vals } else { # old default for plot.agnes() and plot.diana() ss <- seq(0, floor(m), length = 11)# => intervals = 1/10 {total} at.vals <- c(ss, m) lab.vals <- round(at.vals, 2) } } if(fromLeft) { w <- rbind(w, m - w) if(missing(col)) col <- rev(col) } else { ## from Right w <- rbind(m - w, w) if(axes && rev.xax) { at.vals <- m - rev(at.vals)## == c(0, ss + m - floor(m)) lab.vals <- rev(lab.vals) } } if(yax.do) { ax <- if(yaxRight) list(side = 4, pos = m) else list(side = 2, pos = 0) if((pm <- par("mar"))[ax$side] < y.mar) { ## need space besides y axis for labeling pm[ax$side] <- y.mar op <- par(mar = pm) on.exit(par(op)) } } barplot(w, xlab = xlab, horiz = TRUE, space = 0, axes = FALSE, col = col, border = border, mgp = c(2.5, 1, 0), ...) if(frame.plot && (border == 0 || border == par("bg"))) rect(0, 0, m, ncol(w)) title(main = main, sub = sub, adj = adj) if(axes) { axis(1, at = at.vals, labels = lab.vals, ...) if(yax.do) { if(is.null(labels)) labels <- rev(if (length(x$order.lab) != 0) substring(x$order.lab, 1,max.strlen) else x$order) axis(ax$side, at = 0:(length(x$order) - 1), las = 1, labels = labels, pos = ax$pos, mgp = c(3, 1.25, 0), ...) } } invisible() } ## plot.diana() [further down] & plot.agnes() are almost identical; ## -- made bannerplot() a stand-alone function ## --> maybe *merge* these two into one plot.twins() plot.agnes <- function(x, ask = FALSE, which.plots = NULL, main = NULL, sub = paste("Agglomerative Coefficient = ", round(x$ac, digits = 2)), adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, ...) { if(is.null(main)) { cl <- paste(strwrap(deparse(x$call, 150)[1], width = 60, exdent = 7), collapse="\n") ## Different default for banner & pltree: main1 <- paste("Banner of ", cl) main2 <- paste("Dendrogram of ", cl) } else { # same title for both main1 <- main2 <- main } if(is.null(which.plots) && !ask) which.plots <- 1:2 if(ask && is.null(which.plots)) { ## Use 'menu' .. tmenu <- paste("plot ", ## choices : c("All", "Banner", "Clustering Tree")) do.all <- FALSE repeat { if(!do.all) pick <- menu(tmenu, title = "\nMake a plot selection (or 0 to exit):\n") + 1 switch(pick, return(invisible()), # 0 -> exit loop do.all <- TRUE,# 1 : All bannerplot(x, fromLeft = TRUE, main = main1, sub = sub, adj = adj, xax.pretty = 10, nmax.lab= nmax.lab, max.strlen= max.strlen, ...), pltree (x, main = main2, sub = sub, ...) # 3 ) if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1} } } else { ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive() if(ask) { op <- par(ask = TRUE) on.exit(par(op)) } for(i in which.plots) switch(i, bannerplot(x, fromLeft = TRUE, main = main1, sub = sub, adj = adj, xax.pretty = 10, nmax.lab = nmax.lab, max.strlen = max.strlen, ...), pltree (x, main = main2, sub = sub, ...) ) } invisible() } plot.diana <- function(x, ask = FALSE, which.plots = NULL, main = NULL, sub = paste("Divisive Coefficient = ", round(x$dc, digits = 2)), adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, ...) { if(is.null(main)) { cl <- paste(strwrap(deparse(x$call, 150)[1], width = 60, exdent = 7), collapse="\n") ## Different default for banner & pltree: main1 <- paste("Banner of ", cl) main2 <- paste("Dendrogram of ", cl) } else { # same title for both main1 <- main2 <- main } if(is.null(which.plots) && !ask) which.plots <- 1:2 if(ask && is.null(which.plots)) { ## Use 'menu' .. tmenu <- paste("plot ", ## choices : c("All", "Banner", "Clustering Tree")) do.all <- FALSE repeat { if(!do.all) pick <- menu(tmenu, title = "\nMake a plot selection (or 0 to exit):\n") + 1 switch(pick, return(invisible()), # 0 -> exit loop do.all <- TRUE,# 1 : All bannerplot(x, fromLeft = FALSE, main = main1, sub = sub, adj = adj, xax.pretty = 10, nmax.lab= nmax.lab, max.strlen= max.strlen, ...), pltree (x, main = main2, sub = sub, ...) ) if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1} } } else { ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive() if(ask) { op <- par(ask = TRUE) on.exit(par(op)) } for(i in which.plots) switch(i, bannerplot(x, fromLeft = FALSE, main = main1, sub = sub, adj = adj, xax.pretty = 10, nmax.lab = nmax.lab, max.strlen = max.strlen, ...),# 1 pltree (x, main = main2, sub = sub, ...) # i = 2 ) } invisible() } plot.mona <- function(x, main = paste("Banner of ", deparse(x$call)), sub = NULL, xlab = "Separation step", col = c(2,0), axes = TRUE, adj = 0, nmax.lab = 35, max.strlen = 5, ...) { w <- rev(x$step) m <- max(w) if(any(i0 <- w == 0)) w[i0] <- m <- m+1 bannerplot(x[c("order","order.lab")], w = w, fromLeft = TRUE, yaxRight = FALSE, col = col, main = main, sub = sub, xlab = xlab, adj= adj, axes= axes, nmax.lab= nmax.lab, max.strlen= max.strlen, xax.pretty = m+1, ...) names <- paste(" ", rev(x$variable)) is.na(names) <- i0 text(w, 1:length(names) - 0.5, names, adj = 0, col = col[1], ...) } cluster/R/plotpart.q0000644000175100001440000004000412540507627014207 0ustar hornikusers### $Id: plotpart.q 6952 2015-06-18 09:29:59Z maechler $ plot.partition <- function(x, ask = FALSE, which.plots = NULL, nmax.lab = 40, max.strlen = 5, data = x$data, dist = NULL, stand = FALSE, lines = 2, shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE, span = TRUE, xlim = NULL, ylim = NULL, main = NULL, ...) { if(is.null(x$data))# data not kept x$data <- data if(is.null(x$data) && !is.null(dist)) x$diss <- dist if(is.null(which.plots) && !ask) which.plots <- { if(is.null(x$data) && (is.null(x$diss) || inherits(x, "clara"))) 2 ## no clusplot else 1:2 } if(ask && is.null(which.plots)) { ## Use 'menu' .. tmenu <- paste("plot ", ## choices : c("All", "Clusplot", "Silhouette Plot")) do.all <- FALSE repeat { if(!do.all) pick <- menu(tmenu, title = "\nMake a plot selection (or 0 to exit):\n") + 1 switch(pick, return(invisible())# 0 -> exit loop , do.all <- TRUE# 1 : All , clusplot(x, stand = stand, lines = lines, shade = shade, color = color, labels = labels, plotchar = plotchar, span = span, xlim = xlim, ylim = ylim, main = main, ...) , plot(silhouette(x), nmax.lab, max.strlen, main = main) ) if(do.all) { pick <- pick + 1; do.all <- pick <= length(tmenu) + 1} } invisible() } else { ask <- prod(par("mfcol")) < length(which.plots) && dev.interactive() if(ask) { op <- par(ask = TRUE); on.exit(par(op)) } for(i in which.plots) switch(i, clusplot(x, stand = stand, lines = lines, shade = shade, color = color, labels = labels, plotchar = plotchar, span = span, xlim = xlim, ylim = ylim, main = main, ...) , plot(silhouette(x), nmax.lab, max.strlen, main = main) ) ## and return() whatever *plot(..) returns } } clusplot <- function(x, ...) UseMethod("clusplot") ##' @title Make/Check the (n x 2) matrix needed for clusplot.default(): ##' @param x numeric matrix or dissimilarity matrix (-> clusplot.default()) ##' @param diss logical indicating if 'x' is dissimilarity matrix. In that case, ##' 'cmdscale()' is used, otherwise (typically) 'princomp()'. ##' @return a list with components ##' x1 : (n x 2) numeric matrix; ##' var.dec: a number (in [0,1]), the "variance explained" ##' labs : the point labels (possibly 1:n) ##' @author Martin Maechler mkCheckX <- function(x, diss) { if(diss) { if(anyNA(x)) stop("NA-values are not allowed in dist-like 'x'.") if(inherits(x, "dist")) { n <- attr(x, "Size") labs <- attr(x, "Labels") } else { # x (num.vector or square matrix) must be transformed into diss. siz <- sizeDiss(x) if(is.na(siz)) { if((n <- nrow(x)) != ncol(x)) stop("Distances must be result of dist or a square matrix.") if(all.equal(x, t(x)) != TRUE) stop("the square matrix is not symmetric.") labs <- dimnames(x)[[1]] } else { if(!is.vector(x)) { labs <- attr(x, "Labels") # possibly NULL x <- as.matrix(x) if((n <- nrow(x)) == ncol(x) && all.equal(x, t(x)) == TRUE) { labs <- dimnames(x)[[1]] } else { ## Hmm, when does this ever happen : ## numeric, not-dist, non-vector, not symmetric matrix ? warning(">>>>> funny case in clusplot.default() -- please report!\n") ## if(n != sizeDiss(x)) ... attr(x, "Size") <- siz <- sizeDiss(x) if(is.null(labs)) labs <- 1:siz } } else { attr(x, "Size") <- n <- siz } } } x1 <- cmdscale(x, k = 2, add = TRUE) if(x1$ac < 0) ## Rarely ! (FIXME: need and test example!) x1 <- cmdscale(x, k = 2, eig = TRUE)# FIXME: don't need 'eig' but $GOF var.dec <- x1$GOF[2] # always in [0,1] x1 <- x1$points } else { ## Not (diss) if(!is.matrix(x)) stop("x is not a data matrix") if(anyNA(x)) { y <- is.na(x) if(any(apply(y, 1, all))) stop("one or more objects contain only missing values") if(any(apply(y, 2, all))) stop("one or more variables contain only missing values") x <- apply(x, 2, function(x) { x[is.na(x)] <- median(x, na.rm = TRUE); x } ) message("Missing values were displaced by the median of the corresponding variable(s)") } n <- nrow(x) labs <- dimnames(x)[[1]] x1 <- if(ncol(x) == 1) { hulp <- rep(0, length(x)) var.dec <- 1 matrix(c(t(x), hulp), ncol = 2) } else { prim.pr <- princomp(x, scores = TRUE, cor = ncol(x) != 2) var.dec <- cumsum(prim.pr$sdev^2/sum(prim.pr$ sdev^2))[2] prim.pr$scores[, 1:2] } } list(x = x1, var.dec = var.dec, labs = if(is.null(labs)) 1:n else labs) } ## mkCheckX() ## TODO: allow components (2,3) or (1,3) instead of always (1,2) => drop 'var.dec', 'sub' clusplot.default <- function(x, clus, diss = FALSE, s.x.2d = mkCheckX(x, diss), stand = FALSE, lines = 2, shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE, col.p = "dark green", # was 5 (= shaded col) col.txt = col.p, col.clus = if(color) c(2, 4, 6, 3) else 5, cex = 1, cex.txt = cex, span = TRUE, add = FALSE, xlim = NULL, ylim = NULL, main = paste("CLUSPLOT(", deparse(substitute(x)),")"), sub = paste("These two components explain", round(100 * var.dec, digits = 2), "% of the point variability."), xlab = "Component 1", ylab = "Component 2", verbose = getOption("verbose"), ...) { force(main) if(is.data.frame(x)) x <- data.matrix(x) if(!is.numeric(x)) stop("x is not numeric") ## FIXME: - if labels == 0 or == 4, do not need "labs" ## - if !missing(sub), do not need "var.dec" stopifnot(is.list(s.x.2d), c("x","labs","var.dec") %in% names(s.x.2d), (n <- nrow(x1 <- s.x.2d[["x"]])) > 0) labels1 <- s.x.2d[["labs"]] var.dec <- s.x.2d[["var.dec"]] ## --- The 2D space is setup and points are in x1[,] (n x 2) --- clus <- as.vector(clus) if(length(clus) != n) stop("The clustering vector is of incorrect length") clus <- as.factor(clus) if(anyNA(clus)) stop("NA-values are not allowed in clustering vector") if(stand) x1 <- scale(x1) levclus <- levels(clus) nC <- length(levclus) # the number of clusters d.x <- diff(range(x1[, 1])) d.y <- diff(range(x1[, 2])) z <- A <- vector("list", nC) loc <- matrix(0, nrow = nC, ncol = 2) d2 <- verhoud <- numeric(nC) ## num1 .. num6 : all used only once -- there are more constants anyway num3 <- 90 num6 <- 70 for(i in 1:nC) { ##------------- i-th cluster -------------- x <- x1[clus == levclus[i],, drop = FALSE ] aantal <- nrow(x) # number of observations in cluster [i] cov <- var(if(aantal == 1) { if(verbose) cat("cluster",i," has only one observation ..\n") rbind(x, c(0, 0)) } else x) x.1 <- range(x[, 1]) y.1 <- range(x[, 2]) notrank2 <- qr(cov, tol = 0.001)$rank != 2 if(!span && notrank2) { d2[i] <- 1 if((abs(diff(x.1)) > d.x/70) || (abs(diff(y.1)) > d.y/50)) { loc[i, ] <- c(x.1[1] + diff(x.1)/2, y.1[1] + diff(y.1)/2) a <- sqrt((loc[i, 1] - x.1[1])^2 + (loc[i, 2] - y.1[1])^2) a <- a + 0.05 * a num2 <- 40 if(abs(diff(x.1)) > d.x/70 ) { ind1 <- which.max(x[,1]) ind2 <- which.min(x[,1]) q <- atan((x[ind1, 2] - x[ind2, 2])/ (x[ind1, 1] - x[ind2, 1])) b <- if(d.y == 0) 1 else if(abs(diff(y.1)) > d.y/50) diff(y.1)/10 ## num1 <- 10 else d.y/num2 } else { b <- if(d.x == 0) 1 else d.x/num2 q <- pi/2 } D <- diag(c(a^2, b^2)) R <- rbind(c(cos(q), -sin(q)), c(sin(q), cos(q))) A[[i]] <- (R %*% D) %*% t(R) } else { a <- d.x/num3 b <- d.y/num6 if(a == 0) a <- 1 if(b == 0) b <- 1 A[[i]] <- diag(c(a^2, b^2)) loc[i, ] <- x[1, ] } oppervlak <- pi * a * b } else if(span && notrank2) { d2[i] <- 1 if(sum(x[, 1] != x[1, 1]) != 0 || sum(x[, 2] != x[1, 2]) != 0) { loc[i, ] <- c(x.1[1] + diff(x.1)/2, y.1[1] + diff(y.1)/2) a <- sqrt((loc[i, 1] - x.1[1])^2 + (loc[i, 2] - y.1[1])^2) if(any(x[, 1] != x[1, 1])) { ind1 <- which.max(x[,1]) ind2 <- which.min(x[,1]) q <- atan((x[ind1, 2] - x[ind2, 2])/ (x[ind1, 1] - x[ind2, 1])) } else { q <- pi/2 } b <- 1e-7 D <- diag(c(a^2, b^2)) R <- rbind(c(cos(q), -sin(q)), c(sin(q), cos(q))) A[[i]] <- (R %*% D) %*% t(R) } else { a <- d.x/num3 b <- d.y/num6 if(a == 0) a <- 1 if(b == 0) b <- 1 A[[i]] <- diag(c(a^2, b^2)) loc[i, ] <- x[1, ] } oppervlak <- pi * a * b } else { ## rank2 if(!span) { loc[i, ] <- colMeans(x) d2[i] <- max(mahalanobis(x, loc[i, ], cov)) ## * (1+ 0.01)^2 --- dropped factor for back-compatibility } else { ## span and rank2 if(verbose) cat("span & rank2 : calling \"spannel\" ..\n") k <- 2L res <- .C(spannel, aantal, ndep= k, dat = cbind(1., x), sqdist = double(aantal), l1 = double((k+1) ^ 2), double(k), double(k), prob = double(aantal), double(k+1), eps = (0.01),## convergence tol. maxit = 5000L, ierr = integer(1)) if(res$ierr != 0) ## MM : exactmve not available here ! warning("Error in Fortran routine for the spanning ellipsoid,\n rank problem??") cov <- cov.wt(x, res$prob) loc[i, ] <- cov$center ## NB: cov.wt() in R has extra wt[] scaling; revert here: cov <- cov$cov * (1 - sum(cov$wt^2)) d2[i] <- weighted.mean(res$sqdist, res$prob) if(verbose) cat("ellipse( A= (", format(cov[1,]),"*", format(cov[2,2]), "),\n\td2=", format(d2[i]), ", loc[]=", format(loc[i, ]), ")\n") } A[[i]] <- cov ## oppervlak (flam.) = area (Engl.) oppervlak <- pi * d2[i] * sqrt(cov[1, 1] * cov[2, 2] - cov[1, 2]^2) } z[[i]] <- ellipsoidPoints(A[[i]], d2[i], loc[i, ], n.half= 201) verhoud[i] <- aantal/oppervlak } ## end for( i-th cluster ) x.range <- do.call(range, lapply(z, `[`, i=TRUE, j = 1)) y.range <- do.call(range, lapply(z, `[`, i=TRUE, j = 2)) verhouding <- sum(verhoud[verhoud < 1e7]) if(verhouding == 0) verhouding <- 1 ## num4 <- 37 ; num5 <- 3 --- but '41' is another constant density <- 3 + (verhoud * 37)/verhouding density[density > 41] <- 41 if (span) { if (d.x == 0) ## diff(range(x[,1]) == 0 : x-coords all the same x.range <- x1[1, 1] + c(-1,1) if (d.y == 0) ## diff(range(x[,2]) == 0 : y-coords all the same y.range <- x1[1, 2] + c(-1,1) } if(is.null(xlim)) xlim <- x.range if(is.null(ylim)) ylim <- y.range if(length(col.p) < n) col.p <- rep(col.p, length= n) ## --- Now plotting starts --- ## "Main plot" -- if(!add) { plot(x1, xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab, main = main, type = if(plotchar) "n" else "p", # if(plotchar) add points later col = col.p, cex = cex, ...) if(!is.null(sub) && !is.na(sub) && nchar(sub) > 0) title(sub = sub, adj = 0) } if(color) { if(length(col.clus) < min(4,nC)) stop("'col.clus' should have length 4 when color is TRUE") i.verh <- order(verhoud) jInd <- if(nC > 4) pam(verhoud[i.verh], 4)$clustering else 1:nC for(i in 1:nC) { k <- i.verh[i] polygon(z[[k]], density = if(shade) density[k] else 0, col = col.clus[jInd[i]], ...) } col.clus <- col.clus[jInd][order(i.verh)] } else { for(i in 1:nC) polygon(z[[i]], density = if(shade) density[i] else 0, col = col.clus, ...) } ## points after polygon in order to write ON TOP: if(plotchar) { karakter <- 1:19 for(i in 1:nC) { iC <- clus == levclus[i] points(x1[iC, , drop = FALSE], cex = cex, pch = karakter[1+(i-1) %% 19], col = col.p[iC], ...) } } if(nC > 1 && (lines == 1 || lines == 2)) { ## Draw lines between all pairs of the nC cluster (centers) ## utilities for computing ellipse intersections: clas.snijpunt <- function(x, loc, m, n, p) { if ( !is.na(xm <- x[1,m]) && loc[n, m] <= xm && xm <= loc[p, m]) x[1, ] else if(!is.na(xm <- x[2,m]) && loc[n, m] <= xm && xm <= loc[p, m]) x[2, ] else NA } coord.snijp1 <- function(x, gemid) x[2, 2] - 2 * x[1, 2] * gemid + x[1, 1] * gemid^2 coord.snijp2 <- function(x, d2, y) ((x[1, 1] * x[2, 2] - x[1, 2]^2) * d2)/y coord.snijp3 <- function(xx, y, gemid) { sy <- sqrt(y) sy <- c(sy, -sy) cbind(xx[1] + sy, xx[2] + gemid*sy) } afstand <- matrix(0, ncol = nC, nrow = nC) for(i in 1:(nC - 1)) { for(j in (i + 1):nC) { gemid <- (loc[j, 2] - loc[i, 2])/(loc[j, 1] - loc[i, 1]) s0 <- coord.snijp1(A[[i]], gemid) b0 <- coord.snijp2(A[[i]], d2[i], s0) snijp.1 <- coord.snijp3(loc[i,], y=b0, gemid) s1 <- coord.snijp1(A[[j]], gemid) b1 <- coord.snijp2(A[[j]], d2[j], s1) snijp.2 <- coord.snijp3(loc[j,], y=b1, gemid) if(loc[i, 1] != loc[j, 1]) { if(loc[i, 1] < loc[j, 1]) { punt.1 <- clas.snijpunt(snijp.1, loc, 1, i, j) punt.2 <- clas.snijpunt(snijp.2, loc, 1, i, j) } else { punt.1 <- clas.snijpunt(snijp.1, loc, 1, j, i) punt.2 <- clas.snijpunt(snijp.2, loc, 1, j, i) } } else { if(loc[i, 2] < loc[j, 2]) { punt.1 <- clas.snijpunt(snijp.1, loc, 2, i, j) punt.2 <- clas.snijpunt(snijp.2, loc, 2, i, j) } else { punt.1 <- clas.snijpunt(snijp.1, loc, 2, j, i) punt.2 <- clas.snijpunt(snijp.2, loc, 2, j, i) } } if(is.na(punt.1[1]) || is.na(punt.2[1]) || (sqrt((punt.1[1] - loc[i, 1])^2 + (punt.1[2] - loc[i, 2])^2) + sqrt((punt.2[1] - loc[j, 1])^2 + (punt.2[2] - loc[j, 2])^2)) > sqrt((loc[j, 1] - loc[i, 1])^2 + (loc[j, 2] - loc[i, 2])^2)) { afstand[i, j] <- NA } else if(lines == 1) { afstand[i, j] <- sqrt((loc[i, 1] - loc[j, 1])^2 + (loc[i, 2] - loc[j, 2])^2) segments(loc[i, 1], loc[i, 2], loc[j, 1], loc[j, 2], col = 6, ...) } else { ## lines == 2 afstand[i, j] <- sqrt((punt.1[1] - punt.2[1])^2 + (punt.1[2] - punt.2[2])^2) segments(punt.1[1], punt.1[2], punt.2[1], punt.2[2], col = 6, ...) } } } afstand <- t(afstand) + afstand } else afstand <- NULL if(labels) { if(labels == 1) { for(i in 1:nC) { ## add cluster border points m <- nrow(z[[i]]) ni <- length(ii <- seq(1, m, by = max(1, m %/% 40))) x1 <- rbind(x1, z[[i]][ii, ]) labels1 <- c(labels1, rep(levclus[i], ni)) ## identify() only allows one color: ##col.txt <- c(col.txt, rep(col.clus[if(color) i else 1], ni)) } identify(x1, labels = labels1, col = col.txt[1]) } else { ### FIXME --- 'cex.txt' but also allow to specify 'cex' (for the points) ??? Stext <- function(xy, labs, ...) { ## FIXME: these displacements are not quite ok! xy[, 1] <- xy[, 1] + diff(x.range)/130 xy[, 2] <- xy[, 2] + diff(y.range)/50 text(xy, labels = labs, ...) } if(labels == 3 || labels == 2) Stext(x1, labels1, col = col.txt, cex = cex.txt, ...) if(labels %in% c(2,4,5)) { maxima <- t(sapply(z, `[`, i=201, j=1:2)) Stext(maxima, levclus, font = 4, col = col.clus, cex = cex, ...) } if(labels == 5) identify(x1, labels = labels1, col = col.txt[1]) } } density[density == 41] <- NA invisible(list(Distances = afstand, Shading = density)) } clusplot.partition <- function(x, main = NULL, dist = NULL, ...) { if(is.null(main) && !is.null(x$call)) main <- paste("clusplot(",format(x$call),")", sep="") if(length(x$data) != 0 && (!anyNA(x$data) || data.class(x) == "clara")) clusplot.default(x$data, x$clustering, diss = FALSE, main = main, ...) else if(!is.null(dist)) clusplot.default(dist, x$clustering, diss = TRUE, main = main, ...) else if(!is.null(x$diss)) clusplot.default(x$diss, x$clustering, diss = TRUE, main = main, ...) else { ## try to find "x$diss" by looking at the pam() call: if(!is.null(x$call)) { xD <- try(eval(x$call[[2]], envir = parent.frame())) if(inherits(xD, "try-error") || !inherits(xD, "dist")) stop(gettextf("no diss nor data found, nor the original argument of %s", deparse(x$call))) ## else ## warning("both 'x$diss' and 'dist' are empty; ", ## "trying to find the first argument of ", deparse(x$call)) clusplot.default(xD, x$clustering, diss = TRUE, main = main, ...) } else stop("no diss nor data found for clusplot()'") } } cluster/R/ellipsoidhull.R0000644000175100001440000000772512540507660015166 0ustar hornikusers#### ellipsoidhull : Find (and optionally draw) #### ----------- the smallest ellipsoid containining a set of points #### #### Just making the algorithms in clusplot() available more generally #### ( --> ./plotpart.q ) ### Author: Martin Maechler, Date: 21 Jan 2002, 15:41 ellipsoidhull <- function(x, tol = 0.01, maxit = 5000, ret.wt = FALSE, ret.sqdist = FALSE, ret.pr = FALSE) { if(!is.matrix(x) || !is.numeric(x)) stop("'x' must be numeric n x p matrix") if(anyNA(x)) { warning("omitting NAs") x <- na.omit(x) } n <- nrow(x) if(n == 0) stop("no points without missing values") p <- ncol(x) res <- .C(spannel, n, ndep= p, dat = cbind(1., x), sqdist = double(n), l1 = double((p+1) ^ 2), double(p), double(p), prob = double(n), double(p+1), eps = as.double(tol), maxit = as.integer(maxit), ierr = integer(1))# 0 or non-zero if(res$ierr != 0) cat("Error in Fortran routine computing the spanning ellipsoid,", "\n probably collinear data\n", sep="") if(any(res$prob < 0) || all(res$prob == 0)) stop("computed some negative or all 0 probabilities") conv <- res$maxit < maxit if(!conv) warning(gettextf("algorithm possibly not converged in %d iterations", maxit)) conv <- conv && res$ierr == 0 cov <- cov.wt(x, res$prob) ## cov.wt() in R has extra wt[] scaling; revert here res <- list(loc = cov$center, cov = cov$cov * (1 - sum(cov$wt^2)), d2 = weighted.mean(res$sqdist, res$prob), wt = if(ret.wt) cov$wt, sqdist = if(ret.sqdist) res$sqdist, prob= if(ret.pr) res$prob, tol = tol, eps = max(res$sqdist) - p, it = res$maxit, maxit= maxit, ierr = res$ierr, conv = conv) class(res) <- "ellipsoid" res } print.ellipsoid <- function(x, digits = max(1, getOption("digits") - 2), ...) { d <- length(x$loc) cat("'ellipsoid' in", d, "dimensions:\n center = (", format(x$loc, digits=digits), "); squared ave.radius d^2 = ", format(x$d2, digits=digits), "\n and shape matrix =\n") print(x$cov, digits = digits, ...) cat(" hence,",if(d==2)"area" else "volume"," = ", format(volume(x), digits=digits),"\n") if(!is.null(x$conv) && !x$conv) { cat("\n** Warning: ** the algorithm did not terminate reliably!\n ", if(x$ierr) "most probably because of collinear data" else "(in the available number of iterations)", "\n") } invisible(x) } volume <- function(object) UseMethod("volume") volume.ellipsoid <- function(object) { A <- object$cov pi * object$d2 * sqrt(det(A)) } ## For p = 2 : ## Return (x[i],y[i]) points, i = 1:n, on boundary of ellipse, given ## by 2 x 2 matrix A[], origin 'loc' and d(xy, loc) ^2 = 'd2' ellipsoidPoints <- function(A, d2, loc, n.half = 201) { if(length(d <- dim(A)) != 2 || (p <- d[1]) != d[2]) stop("'A' must be p x p cov-matrix defining an ellipsoid") if(p == 2) { detA <- A[1, 1] * A[2, 2] - A[1, 2]^2 yl2 <- A[2, 2] * d2 # = (y_max - y_loc)^2 y <- seq( - sqrt(yl2), sqrt(yl2), length = n.half) sqrt.discr <- sqrt(detA * pmax(0, yl2 - y^2))/A[2, 2] sqrt.discr[c(1, n.half)] <- 0 b <- loc[1] + A[1, 2]/A[2, 2] * y y <- loc[2] + y return(rbind(cbind( b - sqrt.discr, y), cbind(rev(b + sqrt.discr), rev(y)))) } else { ## p >= 3 detA <- det(A) ##-- need something like polar coordinates stop("ellipsoidPoints() not yet implemented for p >= 3 dim.") } } predict.ellipsoid <- function(object, n.out = 201, ...) ellipsoidPoints(object$cov, d2 = object$d2, loc= object$loc, n.half = n.out) cluster/R/pam.q0000644000175100001440000002044212540507660013120 0ustar hornikusers#### PAM : Partitioning Around Medoids #### --- $Id: pam.q 6953 2015-06-18 09:30:24Z maechler $ pam <- function(x, k, diss = inherits(x, "dist"), metric = "euclidean", medoids = NULL, stand = FALSE, cluster.only = FALSE, do.swap = TRUE, keep.diss = !diss && !cluster.only && n < 100, keep.data = !diss && !cluster.only, ## use.Call = TRUE, ## for testing, comparing .C() <-> .Call() will become TRUE ## ---------------- pamonce = FALSE, trace.lev = 0) { stopifnot(length(cluster.only) == 1, length(trace.lev) == 1) use.Call <- TRUE ## if using new .Call() instead of old .C() if((diss <- as.logical(diss))) { ## check type of input vector if(anyNA(x)) stop("NA values in the dissimilarity matrix not allowed.") if(data.class(x) != "dissimilarity") { # try to convert to if(!is.null(dim(x))) { x <- as.dist(x) # or give an error } else { ## possibly convert input *vector* if(!is.numeric(x) || is.na(n <- sizeDiss(x))) stop("'x' is not and cannot be converted to class \"dissimilarity\"") attr(x, "Size") <- n } class(x) <- dissiCl if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified" } if(keep.data) stop("Cannot keep data when 'x' is a dissimilarity!") ## adapt S dissimilarities to Fortran: ## convert upper matrix, read by rows, to lower matrix, read by rows. n <- attr(x, "Size") dv <- x[lower.to.upper.tri.inds(n)] ## prepare arguments for the Fortran call dv <- c(0, dv) ## <- internally needed {FIXME! memory hog!} storage.mode(dv) <- "double" jp <- 1 mdata <- FALSE ndyst <- 0 if(!use.Call) x2 <- double()# unused in this case } else { ## check input matrix and standardize, if necessary x <- data.matrix(x)# dropping "automatic rownames" compatibly with daisy() if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.") x2 <- x ; dimnames(x2) <- NULL if(stand) x2 <- scale(x2, scale = apply(x2, 2, meanabsdev)) ## put info about metric, size and NAs in arguments for the Fortran call ndyst <- if(metric == "manhattan") 2 else 1 n <- nrow(x2) jp <- ncol(x2) if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs jtmd <- integer(jp) jtmd[apply(inax, 2L, any)] <- -1L ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE))) x2[inax] <- valmisdat } storage.mode(x2) <- "double" if(!use.Call) dv <- double(1 + (n * (n - 1))/2) } if((k <- as.integer(k)) < 1 || k >= n) stop("Number of clusters 'k' must be in {1,2, .., n-1}; hence n >= 2") if(is.null(medoids)) { # default: using "build & swap" to determine medoids" if(!use.Call) medoids <- integer(k)# all 0 -> will be used as 'code' in C } else { ## 'fixme': consider sort(medoids) {and rely on it in ../src/pam.c } if(!is.integer(medoids)) medoids <- as.integer(medoids) if(length(medoids) != k || any(medoids < 1L) || any(medoids > n) || any(duplicated(medoids))) stop(gettextf( "'medoids' must be NULL or vector of %d distinct indices in {1,2, .., n}, n=%d", k, n)) ## use observation numbers 'medoids' as starting medoids for 'swap' only } nisol <- integer(if(cluster.only) 1 else k) if(do.swap) nisol[1] <- 1L if(use.Call) res <- .Call(cl_Pam, k, n, !diss, # == do_diss: compute d[i,j] them from x2[] and allocate in C if(diss) dv else x2, !cluster.only, ## == all_stats == "old" obj[1+ 0] == 0 medoids, do.swap, trace.lev, keep.diss, pamonce, ## only needed if(!diss) [ <=> if(do_diss) ] : if(mdata) rep(valmisdat, jp) else double(1), # valmd if(mdata) jtmd else integer(jp), # jtmd as.integer(ndyst)) # dist_kind else res <- .C(cl_pam, as.integer(n), as.integer(jp), k, x = x2, # only accessed if(!diss) dys = dv, jdyss = as.integer(diss), if(mdata) rep(valmisdat, jp) else double(1), # valmd if(mdata) jtmd else integer(jp), # jtmd as.integer(ndyst), # ndyst integer(n), # nsend[] logical(n), # nrepr[] integer(if(cluster.only) 1 else n), # nelem[] double(n), # radus[] double(n), # damer[] avsil = double(n), # avsyl 'ttd' double(n), # separ[] ttsil = double(1), # ttsyl obj = as.double(c(cluster.only, trace.lev)),# in & out! ##= med = medoids, # med in & out{if (!cluster.only)} clu = integer(n), # ncluv clusinf = if(cluster.only) 0. else matrix(0., k, 5), silinf = if(cluster.only) 0. else matrix(0., n, 4), isol = nisol, # nisol: integer(if(cluster.only) 1 else k) as.integer(pamonce)) # care!! ## use from result: clu, dys, jdyss, med, silinf, obj, isol, clusinf, avsil, ttsil ## 'dys' : only used if(keep.diss) ## Error if have NA's in diss: if(!diss && ((use.Call && is.integer(res)) || (!use.Call && res$jdyss == -1))) stop("No clustering performed, NAs in the computed dissimilarity matrix.") xLab <- if(diss) attr(x, "Labels") else dimnames(x)[[1]] r.clu <- res$clu if(length(xLab) > 0) names(r.clu) <- xLab if(cluster.only) return(r.clu) ## Else, usually medID <- res$med if(any(medID <= 0)) stop("error from .C(cl_pam, *): invalid medID's") sildim <- res$silinf[, 4] if(diss) { ## add labels to Fortran output r.med <- if(length(xLab) > 0) { sildim <- xLab[sildim] xLab[medID] } else medID } else { if(keep.diss) { ## adapt Fortran output to S: ## convert lower matrix, read by rows, to upper matrix, read by rows. disv <- res$dys[-1] disv[disv == -1] <- NA disv <- disv[upper.to.lower.tri.inds(n)] class(disv) <- dissiCl attr(disv, "Size") <- nrow(x) attr(disv, "Metric") <- metric attr(disv, "Labels") <- dimnames(x)[[1]] } ## add labels to Fortran output r.med <- x[medID, , drop=FALSE] if(length(xLab) > 0) sildim <- xLab[sildim] } ## add names & dimnames to Fortran output r.obj <- structure(res$obj, .Names = c("build", "swap")) r.isol <- factor(res$isol, levels = 0:2, labels = c("no", "L", "L*")) names(r.isol) <- 1:k r.clusinf <- res$clusinf dimnames(r.clusinf) <- list(NULL, c("size", "max_diss", "av_diss", "diameter", "separation")) ## construct S object r <- list(medoids = r.med, id.med = medID, clustering = r.clu, objective = r.obj, isolation = r.isol, clusinfo = r.clusinf, silinfo = if(k != 1) { silinf <- res$silinf[, -4, drop=FALSE] dimnames(silinf) <- list(sildim, c("cluster", "neighbor", "sil_width")) list(widths = silinf, clus.avg.widths = res$avsil[1:k], avg.width = res$ttsil) }, diss = if(keep.diss) { if(diss) x else disv }, call = match.call()) if(keep.data) { ## have !diss if(mdata) x2[x2 == valmisdat] <- NA r$data <- structure(x2, dimnames = dimnames(x)) } class(r) <- c("pam", "partition") r } ## non-exported: .print.pam <- function(x, ...) { cat("Medoids:\n"); print(cbind(ID = x$id.med, x$medoids), ...) cat("Clustering vector:\n"); print(x$clustering, ...) cat("Objective function:\n"); print(x$objective, ...) } print.pam <- function(x, ...) { .print.pam(x, ...) cat("\nAvailable components:\n") print(names(x), ...) invisible(x) } summary.pam <- function(object, ...) { class(object) <- "summary.pam" object } print.summary.pam <- function(x, ...) { .print.pam(x, ...) cat("\nNumerical information per cluster:\n"); print(x$clusinfo, ...) cat("\nIsolated clusters:\n L-clusters: ") print(names(x$isolation[x$isolation == "L"]), quote = FALSE, ...) cat(" L*-clusters: ") print(names(x$isolation[x$isolation == "L*"]), quote = FALSE, ...) if(length(x$silinfo) != 0) { cat("\nSilhouette plot information:\n") print(x$silinfo[[1]], ...) cat("Average silhouette width per cluster:\n") print(x$silinfo[[2]], ...) cat("Average silhouette width of total data set:\n") print(x$silinfo[[3]], ...) } if(!is.null(x$diss)) { ## Dissimilarities: cat("\n"); print(summary(x$diss, ...)) } cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } cluster/R/coef.R0000644000175100001440000000253011674345261013222 0ustar hornikusers#### R-interface to Agglomerative / Divisive coefficient #### coef.twins <- function(object, ...) { if(inherits(object, "agnes")) object$ac else if(inherits(object, "diana")) object$dc else stop("invalid 'twins' object") } coef.hclust <- function(object, ...) { ## Author: Martin Maechler, Date: 27 Nov 2004 ## Now "really" using $merge _and_ $height -- assuming they match! ht <- object$height mrg <- object$merge nh <- length(ht) stopifnot(nh > 0, is.matrix(mrg), dim(mrg) == c(nh,2), is.numeric(ht), is.numeric(mrg), !is.unsorted(ht))# then they match with merge ## stopifnot(all.equal(1:n, sort(-mrg[mrg < 0]))) 1 - sum(rowSums(mrg < 0) * ht) / max(ht) / (nh+1) } ## Note this is (the only!) direct interface to bncoef(), ## ---- which is used internally both in agnes() and diana() : coefHier <- function(object) { ## Purpose: Compute agglomerative *or* divisive coefficient from hclust/agnes/diana ## ---------------------------------------------------------------------- ## Author: Martin Maechler, Date: 27 Nov 2004 nh <- length(ht <- object$height) stopifnot(nh > 0, is.numeric(ht)) .C(R_bncoef, n = as.integer(nh + 1L), ban= as.double(c(0., ht)),# <-- is this really tbe ban[]nner, as in ../src/twins.c ? cf = double(1))$cf } cluster/R/daisy.q0000644000175100001440000001640112540507660013454 0ustar hornikusers daisy <- function(x, metric = c("euclidean", "manhattan", "gower"), stand = FALSE, type = list(), weights = rep.int(1, p)) { ## check type of input matrix if(length(dx <- dim(x)) != 2 || !(is.data.frame(x) || is.numeric(x))) stop("x is not a dataframe or a numeric matrix.") n <- dx[1]# nrow p <- dx[2]# ncol varnms <- dimnames(x)[[2]] pColl <- function(n) paste(n, collapse = ", ") if(length(type)) { if(!is.list(type) || is.null(ntyp <- names(type)) || any(ntyp == "")) stop(gettextf("invalid %s; must be named list", sQuote("type"))) ## check each component to be valid column names or numbers: for(nt in ntyp) { cvec <- type[[nt]] ct <- paste0("type$", nt) if(is.character(cvec)) { if(!is.null(varnms) && !all(cvec %in% varnms)) stop(gettextf("%s has invalid column names", ct)) } else if(is.numeric(cvec)) { if(!all(1 <= cvec & cvec <= p)) stop(gettextf("%s must be in 1:ncol(x)", ct)) } else stop(gettextf("%s must contain column names or numbers", ct)) } tA <- type$asymm tS <- type$symm if(!is.null(tA) || !is.null(tS)) { ## tA and tS might be character and integer! d.bin <- cbind(as.data.frame(x[, tA, drop= FALSE]), x[, tS, drop= FALSE]) lenB <- sapply(lapply(d.bin, function(y) levels(as.factor(y))), length) if(any(lenB > 2)) stop("at least one binary variable has more than 2 levels.") if(any(lenB < 2)) warning("at least one binary variable has not 2 different levels.") ## Convert factors to integer, such that ("0","1") --> (0,1): if(any(is.f <- sapply(d.bin, is.factor))) d.bin[is.f] <- lapply(d.bin[is.f], function(f) as.integer(as.character(f))) if(!all(sapply(d.bin, function(y) is.logical(y) || all(sort(unique(as.numeric(y[!is.na(y)])))%in% 0:1)))) stop("at least one binary variable has values not in {0,1,NA}") } } ## transform variables and construct 'type' vector if(is.data.frame(x)) { type2 <- sapply(x, data.class) x <- data.matrix(x) } else { ## matrix type2 <- rep("numeric", p) names(type2) <- colnames(x) } if(length(type)) { tT <- type$ ordratio tL <- type$ logratio x[, names(type2[tT])] <- unclass(as.ordered(x[, names(type2[tT])])) x[, names(type2[tL])] <- log10( x[, names(type2[tL])]) type2[tA] <- "A" type2[tS] <- "S" type2[tT] <- "T" # was "O" (till 2000-12-14) accidentally ! } type2[tI <- type2 %in% c("numeric", "integer") ] <- "I" if(n > 9 && any(tI) && any(iBin <- apply(x[, tI, drop = FALSE], 2, function(v) length(table(v)) == 2))) warning(gettextf("binary variable(s) %s treated as interval scaled", pColl(which(tI)[iBin]))) type2[type2 == "ordered"] <- "O" type2[type2 == "factor"] <- "N" if(any(ilog <- type2 == "logical")) { warning(sprintf(ngettext(sum(ilog), "setting 'logical' variable %s to type 'asymm'", "setting 'logical' variables %s to type 'asymm'"), pColl(which(ilog))), domain = NA) type2[ilog] <- "A" } ## Note: We have 2 status codings: ndyst = (0,1,2) and jdat = (1,2); ## the latter is superfluous in principle ## standardize, if necessary all.I <- all(type2 == "I") if(all.I && { metric <- match.arg(metric); metric != "gower" }) { if(stand) { x <- scale(x, center = TRUE, scale = FALSE) #-> 0-means sx <- colMeans(abs(x), na.rm = TRUE)# can still have NA's if(0 %in% sx) { warning(gettextf( "%s has constant columns %s; these are standardized to 0", sQuote("x"), pColl(which(sx == 0)))) sx[sx == 0] <- 1 } x <- scale(x, center = FALSE, scale = sx) } jdat <- 2L ndyst <- if(metric == "manhattan") 2L else 1L } else { ## mixed case or explicit "gower" if(!missing(metric) && metric != "gower" && !all.I) warning("with mixed variables, metric \"gower\" is used automatically") ## FIXME: think of a robust alternative scaling to ## Gower's (x - min(x)) / (max(x) - min(x)) colR <- apply(x, 2, range, na.rm = TRUE) colmin <- colR[1,] sx <- colR[2,] - colmin if(any(sx == 0)) sx[sx == 0] <- 1 x <- scale(x, center = colmin, scale = sx) jdat <- 1L ndyst <- 0L ## weights only used in this "gower" case if(length(weights) == 1) weights <- rep.int(weights, p) else if(length(weights) != p) stop("'weights' must be of length p (or 1)") } ## type2 <- paste(type2, collapse = "") typeCodes <- c('A','S','N','O','I','T') ## 1 2 3 4 5 6 --> passed to Fortran below type3 <- match(type2, typeCodes)# integer if(any(ina <- is.na(type3))) stop(gettextf("invalid type %s for column numbers %s", type2[ina], pColl(which(ina)))) if((mdata <- any(inax <- is.na(x)))) { # TRUE if x[] has any NAs jtmd <- integer(p) jtmd[apply(inax, 2L, any)] <- -1L ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x, na.rm=TRUE))) x[inax] <- valmisdat } ## call Fortran routine storage.mode(x) <- "double" disv <- .Fortran(cl_daisy, n, p, x, if(mdata) rep(valmisdat, p) else double(1), as.double(weights), if(mdata) jtmd else integer(1), jdat, type3, # vtype ndyst, as.integer(mdata), dis = double((n * (n - 1))/2), NAOK = TRUE# only to allow "+- Inf" )$dis ## adapt Fortran output to S: ## convert lower matrix, read by rows, to upper matrix, read by rows. disv[disv == -1] <- NA full <- matrix(0, n, n) full[!lower.tri(full, diag = TRUE)] <- disv disv <- t(full)[lower.tri(full)] ## give warning if some dissimilarities are missimg if(anyNA(disv)) attr(disv, "NA.message") <- "NA-values in the dissimilarity matrix !" ## construct S object -- "dist" methods are *there* ! class(disv) <- dissiCl # see ./0aaa.R attr(disv, "Labels") <- dimnames(x)[[1]] attr(disv, "Size") <- n attr(disv, "Metric") <- if(!ndyst) "mixed" else metric if(!ndyst) attr(disv, "Types") <- typeCodes[type3] disv } print.dissimilarity <- function(x, diag = NULL, upper = NULL, digits = getOption("digits"), justify = "none", right = TRUE, ...) { cat("Dissimilarities :\n") NextMethod("print")##-> stats:::print.dist(..) cat("\n") if(!is.null(attr(x, "na.message"))) cat("Warning : ", attr(x, "NA.message"), "\n") cat("Metric : ", attr(x, "Metric"), if(!is.null(aT <- attr(x,"Types"))) paste("; Types =", paste(aT, collapse=", ")), "\n") cat("Number of objects : ", attr(x, "Size"), "\n", sep="") invisible(x) } summary.dissimilarity <- function(object, digits = max(3, getOption("digits") - 2), ...) ## 'digits': want a bit higher precision { sx <- summary(as.vector(object), digits = digits, ...) at <- attributes(object) r <- c(list(summ = sx, n = length(object)), at[names(at) != "class"]) class(r) <- "summary.dissimilarity" r } print.summary.dissimilarity <- function(x, ...) { cat(x$n, "dissimilarities, summarized :\n") print(x$summ, ...) cat("Metric : ", x $ Metric, if(!is.null(aT <- x $ Types)) paste("; Types =", paste(aT, collapse=", ")), "\n") cat("Number of objects : ", x $ Size, "\n", sep="") if(!is.null(x $ na.message)) cat("Warning : ", x $ NA.message, "\n") invisible(x) } cluster/R/internal.R0000644000175100001440000000247712461440602014122 0ustar hornikusers#### Cluster - Internal Utilities #### ============================ (new by Martin Maechler) ## This was size(); seems slightly useful in general sizeDiss <- function(d) { ## find 'n' for d == dissimilarity-like(), i.e. length(d)= n(n-1)/2 discr <- 1 + 8 * length(d) sqrtdiscr <- round(sqrt(discr)) if(sqrtdiscr^2 == discr) (1 + sqrtdiscr)/2 else NA } ##' Return indices to *permute* "dissimilarity" / "dist" entries for C (ex-Fortran) code setup ##' ##' Currently always used as: ##' n <- attr(x, "Size") ##' dv <- x[lower.to.upper.tri.inds(n)] ##' -->> FIXME: eventually do the above directly in C ##' @param n "Size" = number of objects, underlying the dist/dissimilarity ##' used in ./agnes.q, ./clara.q, ./diana.q und ./pam.q : ##' *somewhat* related to Matrix:::indTri() lower.to.upper.tri.inds <- function(n) { n1 <- as.integer(n - 1) if(n1 < 1) stop("'n' must be >= 2") else if(n1 == 1) 1L else rep(seq_len(n1), seq_len(n1)) + c(0L, unlist(lapply(2:n1, function(k) cumsum(c(0L, (n - 2L):(n - k)))))) } upper.to.lower.tri.inds <- function(n) { if((n2 <- as.integer(n - 2L)) < 0) stop("'n' must be >= 2") rep(1L + cumsum(0:n2), (n - 1):1) + unlist(lapply(0:n2, function(k) cumsum(k:n2))) } meanabsdev <- function(y) mean(abs(y - mean(y, na.rm = TRUE)), na.rm = TRUE) cluster/R/diana.q0000644000175100001440000001113212540507660013413 0ustar hornikusers### $Id: diana.q 6953 2015-06-18 09:30:24Z maechler $ diana <- function(x, diss = inherits(x, "dist"), metric = "euclidean", stand = FALSE, ##_not_yet stop.at.k = FALSE, keep.diss = n < 100, keep.data = !diss, trace.lev = 0) { if((diss <- as.logical(diss))) { ## check type of input vector if(anyNA(x)) stop("NA values in the dissimilarity matrix not allowed.") if(data.class(x) != "dissimilarity") { # try to convert to if(!is.null(dim(x))) { x <- as.dist(x) # or give an error } else { ## possibly convert input *vector* if(!is.numeric(x) || is.na(n <- sizeDiss(x))) stop("'x' is not and cannot be converted to class \"dissimilarity\"") attr(x, "Size") <- n } class(x) <- dissiCl if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified" } n <- as.integer(attr(x, "Size")) dv <- x[lower.to.upper.tri.inds(n)] ## prepare arguments for the Fortran call dv <- c(0., dv)# double jp <- 1L mdata <- FALSE ndyst <- 0 x2 <- double(1) } else { ## check input matrix and standardize, if necessary x <- data.matrix(x) if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.") x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x ndyst <- if(metric == "manhattan") 2 else 1 n <- nrow(x2) jp <- ncol(x2) if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs jtmd <- integer(jp) jtmd[apply(inax, 2L, any)] <- -1L ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE))) x2[inax] <- valmisdat } dv <- double(1 + (n * (n - 1))/2) } stopifnot(length(trace.lev <- as.integer(trace.lev)) == 1) ##_not_yet stopifnot(is.logical(stop.at.k) || ##_not_yet (is.numeric(stop.at.k) && 1 <= stop.at.k && stop.at.k <= n)) C.keep.diss <- keep.diss && !diss res <- .C(twins, n, jp, as.double(x2), dv, dis = double(if(C.keep.diss) length(dv) else 1), jdyss = if(C.keep.diss) diss + 10L else as.integer(diss), if(mdata) rep(valmisdat, jp) else double(1), if(mdata) jtmd else integer(jp), as.integer(ndyst), 2L,# jalg = 2 <==> DIANA 0L, ##_not_yet as.integer(stop.at.k),# default: 0 do *not* stop early, integer(n), ner = integer(n), ban = double(n), dc = double(1), double(1), # { unused for diana() } merge = matrix(0L, n - 1, 2), # integer trace = trace.lev) if(!diss) { ## give warning if some dissimilarities are missing. if(res$jdyss == -1) stop("No clustering performed, NA's in dissimilarity matrix.\n") if(keep.diss) { ## adapt Fortran output to S: ## convert lower matrix, read by rows, to upper matrix, read by rows. disv <- res$dis[-1] disv[disv == -1] <- NA disv <- disv[upper.to.lower.tri.inds(n)] class(disv) <- dissiCl attr(disv, "Size") <- nrow(x) attr(disv, "Metric") <- metric attr(disv, "Labels") <- dimnames(x)[[1]] } ## add labels to Fortran output if(length(dimnames(x)[[1]]) != 0) order.lab <- dimnames(x)[[1]][res$ner] } else { if(keep.diss) disv <- x ## add labels to Fortran output if(length(attr(x, "Labels")) != 0) order.lab <- attr(x, "Labels")[res$ner] } clustering <- list(order = res$ner, height = res$ban[-1], dc = res$dc, merge = res$merge, diss = if(keep.diss)disv, call = match.call()) if(exists("order.lab")) clustering$order.lab <- order.lab if(keep.data && !diss) { if(mdata) x2[x2 == valmisdat] <- NA clustering$data <- x2 } class(clustering) <- c("diana", "twins") clustering } print.diana <- function(x, ...) { cat("Merge:\n") print(x$merge, ...) cat("Order of objects:\n") print(if (length(x$order.lab) != 0) x$order.lab else x$order, quote = FALSE, ...) cat("Height:\n") print(x$height, ...) cat("Divisive coefficient:\n") print(x$dc, ...) cat("\nAvailable components:\n") print(names(x), ...) invisible(x) } summary.diana <- function(object, ...) { class(object) <- "summary.diana" object } print.summary.diana <- function(x, ...) { cat("Merge:\n"); print(x$merge, ...) cat("Order of objects:\n") print(if(length(x$order.lab)) x$order.lab else x$order, quote = FALSE, ...) cat("Height:\n"); print(x$height, ...) cat("Divisive coefficient:\n"); print(x$dc, ...) if(!is.null(x$diss)) { ## Dissimilarities: cat("\n"); print(summary(x$diss, ...)) } cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } cluster/R/mona.q0000644000175100001440000000627212124362341013273 0ustar hornikusers mona <- function(x) { ## check type of input matrix if(!is.matrix(x) && !is.data.frame(x)) stop("x must be a matrix or data frame.") if(!all(vapply(lapply(as.data.frame(x), function(y) levels(as.factor(y))), length, 1) == 2)) stop("All variables must be binary (e.g., factor with 2 levels).") n <- nrow(x) jp <- ncol(x) ## change levels of input matrix x2 <- apply(as.matrix(x), 2, function(x) as.integer(factor(x))) - 1L x2[is.na(x2)] <- 2:2 ## was ## x2 <- apply(as.matrix(x), 2, factor) ## x2[x2 == "1"] <- "0" ## x2[x2 == "2"] <- "1" ## x2[is.na(x2)] <- "2" ## storage.mode(x2) <- "integer" ## call Fortran routine res <- .Fortran(cl_mona, as.integer(n), as.integer(jp), x2 = x2,# x[,] error = 0L, nban = integer(n), ner = integer(n), integer(n), lava = integer(n), integer(jp)) ## stop with a message when two many missing values: if(res$error != 0) { ## NB: Need "full simple strings below, to keep it translatable": switch(res$error, ## 1 : stop("No clustering performed, an object was found with all values missing."), ## 2 : stop("No clustering performed, found variable with more than half values missing."), ## 3 : never triggers because of binary check above stop("No clustering performed, a variable was found with all non missing values identical."), ## 4 : stop("No clustering performed, all variables have at least one missing value.") ) } ##O res$x2 <- matrix(as.numeric(substring(res$x2, ##O 1:nchar(res$x2), 1:nchar(res$x2))), ##O n, jp) storage.mode(res$x2) <- "integer" # keeping dim() dimnames(res$x2) <- dnx <- dimnames(x) ## add labels to Fortran output if(length(dnx[[2]]) != 0) { lava <- as.character(res$lava) lava[lava != "0"] <- dnx[[2]][res$lava] lava[lava == "0"] <- "NULL" res$lava <- lava } ## construct "mona" object clustering <- list(data = res$x2, order = res$ner, variable = res$lava[ -1 ], step = res$nban[-1], call = match.call()) if(length(dnx[[1]]) != 0) clustering$order.lab <- dnx[[1]][res$ner] class(clustering) <- "mona" clustering } print.mona <- function(x, ...) { cat("Revised data:\n") print(x$data, quote = FALSE, ...) cat("Order of objects:\n") print(if (length(x$order.lab) != 0) x$order.lab else x$order, quote = FALSE, ...) cat("Variable used:\n") print(x$variable, quote = FALSE, ...) cat("Separation step:\n") print(x$step, ...) cat("\nAvailable components:\n") print(names(x), ...) invisible(x) } ## FIXME: These should differ from print() summary.mona <- function(object, ...) { class(object) <- "summary.mona" object } print.summary.mona <- function(x, ...) { print.mona(x, ...) invisible(x) } cluster/R/clara.q0000644000175100001440000001563312463511165013432 0ustar hornikusers#### CLARA := Clustering LARge Applications #### #### Note that the algorithm is O(n), but O(ns^2) where ns == sampsize clara <- function(x, k, metric = "euclidean", stand = FALSE, samples = 5, sampsize = min(n, 40 + 2 * k), trace = 0, medoids.x = TRUE, keep.data = medoids.x, rngR = FALSE, pamLike = FALSE) { ## check type of input matrix and values of input numbers if(inherits(x, "dist"))# catch user error stop("'x' is a \"dist\" object, but should be a data matrix or frame") x <- data.matrix(x) if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.") n <- nrow(x) if((k <- as.integer(k)) < 1 || k > n - 1) stop("The number of cluster should be at least 1 and at most n-1." ) if((sampsize <- as.integer(sampsize)) < max(2,k+1)) stop(gettextf("'sampsize' should be at least %d = max(2, 1+ number of clusters)", max(2,k+1)), domain=NA) if(n < sampsize) stop(gettextf("'sampsize' = %d should not be larger than the number of objects, %d", sampsize, n), domain=NA) if((samples <- as.integer(samples)) < 1) stop("'samples' should be at least 1") jp <- ncol(x) namx <- dimnames(x)[[1]] ## standardize, if necessary {careful not to copy unnecessarily}: if(medoids.x) ## need to save original 'x' ox <- x else if(keep.data) stop("when 'medoids.x' is FALSE, 'keep.data' must be too") if(stand) x <- scale(x, scale = apply(x, 2, meanabsdev)) if(keep.data) data <- x ## put info about metric, size and NAs in arguments for the .C call if((mdata <- any(inax <- is.na(x)))) { # TRUE if x[] has any NAs jtmd <- integer(jp) jtmd[apply(inax, 2L, any)] <- -1L ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x, na.rm=TRUE))) x[inax] <- valmisdat } else rm(inax) # save space res <- .C(cl_clara, n, jp, k, ## 3 clu = as.double(x), samples, # = nran sampsize, # = nsam ## 6 dis = double(1 + (sampsize * (sampsize - 1))/2), as.integer(mdata), # = mdata valmd = if(mdata) rep(valmisdat, jp) else -1., ## 9 jtmd = if(mdata) jtmd else integer(1), as.integer(if(metric == "manhattan") 2 else 1), # = diss_kind as.logical(rngR[1]), # = rng_R ## 12 as.logical(pamLike[1]), # = pam_like integer(sampsize), # = nrepr integer(sampsize), # = nsel ## 15 sample= integer(sampsize),# = nbest integer(k), # = nr imed = integer(k), # = nrx ## 18 double(k), # = radus double(k), # = ttd double(k), # = ratt ## 21 avdis = double(k), # = ttbes maxdis = double(k), # = rdbes ratdis = double(k), # = rabes ## 24 size = integer(k), # = mtt obj = double(1), avsil = double(k), ## 27 ttsil = double(1), silinf = matrix(0, sampsize, 4), jstop = integer(1), ## 30 as.integer(trace), # = trace_lev double (3 * sampsize), # = tmp integer(6 * sampsize)) # = itmp ## 33 ## give a warning when errors occured ## res[] components really used below: ## jstop, clu, silinf, dis, sample, med, imed, obj, size, maxis, avdis, ratdis, ## avsil, ttsil if(res$jstop) { if(mdata && any(aNA <- apply(inax,1, all))) { i <- which(aNA) nNA <- length(i) pasteC <- function(...) paste(..., collapse= ",") if(nNA < 13) stop(sprintf(ngettext(nNA, "Observation %s has *only* NAs --> omit it for clustering", "Observations %s have *only* NAs --> omit them for clustering!"), pasteC(i)), domain = NA) else stop(sprintf(ngettext(nNA, "%d observation (%s) has *only* NAs --> omit them for clustering!", "%d observations (%s ...) have *only* NAs --> omit them for clustering!"), nNA, pasteC(i[1:12])), domain = NA) } ## else if(res$jstop == 1) stop("Each of the random samples contains objects between which no distance can be computed.") if(res$jstop == 2) stop(gettextf("For each of the %d samples, at least one object was found which could not be assigned to a cluster (because of missing values).", samples)) ## else {cannot happen} stop("invalid 'jstop' from .C(cl_clara,.): ", res$jstop) } ## 'res$clu' is still large; cut down ASAP res$clu <- as.integer(res$clu[1:n]) sildim <- res$silinf[, 4] ## adapt C output to S: ## convert lower matrix, read by rows, to upper matrix, read by rows. disv <- res$dis[-1] disv[disv == -1] <- NA disv <- disv[upper.to.lower.tri.inds(sampsize)] class(disv) <- dissiCl attr(disv, "Size") <- sampsize attr(disv, "Metric") <- metric attr(disv, "Labels") <- namx[res$sample] res$med <- if(medoids.x) ox[res$imed, , drop = FALSE] ## add labels to C output if(!is.null(namx)) { sildim <- namx[sildim] res$sample <- namx[res$sample] names(res$clu) <- namx } r <- list(sample = res$sample, medoids = res$med, i.med = res$imed, clustering = res$clu, objective = res$obj, clusinfo = cbind(size = res$size, "max_diss" = res$maxdis, "av_diss" = res$avdis, isolation = res$ratdis), diss = disv, call = match.call()) ## add dimnames to C output if(k > 1) { dimnames(res$silinf) <- list(sildim, c("cluster", "neighbor", "sil_width", "")) r$silinfo <- list(widths = res$silinf[, -4], clus.avg.widths = res$avsil, avg.width = res$ttsil) } if(keep.data) r$data <- data class(r) <- c("clara", "partition") r } print.clara <- function(x, ...) { cat("Call: ", deparse(x$call), "\nMedoids:\n"); print(x$medoids, ...) cat("Objective function:\t ", format(x$objective, ...),"\n", "Clustering vector: \t", sep=""); str(x$clustering, vec.len = 7) cat("Cluster sizes: \t", x$clusinfo[,1], "\nBest sample:\n"); print(x$sample, quote = FALSE, ...) cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } summary.clara <- function(object, ...) { class(object) <- "summary.clara" object } print.summary.clara <- function(x, ...) { cat("Object of class 'clara' from call:\n", deparse(x$call), "\nMedoids:\n"); print(x$medoids, ...) cat("Objective function:\t ", format(x$objective, ...), "\nNumerical information per cluster:\n") print(x$clusinfo, ...) if(has.sil <- !is.null(x$silinfo)) { cat("Average silhouette width per cluster:\n") print(x$silinfo[[2]], ...) cat("Average silhouette width of best sample:", format(x$silinfo[[3]], ...), "\n") } cat("\nBest sample:\n"); print(x$sample, quote = FALSE, ...) cat("Clustering vector:\n"); print(x$clustering, ...) if(has.sil) { cat("\nSilhouette plot information for best sample:\n") print(x$silinfo[[1]], ...) } if(!is.null(x$diss)) { ## Dissimilarities: cat("\n"); print(summary(x$diss, ...)) } cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } cluster/R/silhouette.R0000644000175100001440000002137412446370046014477 0ustar hornikuserssilhouette <- function(x, ...) UseMethod("silhouette") ## Accessor and more: silhouette.partition <- function(x, ...) { r <- x$silinfo$widths if(is.null(r)) stop("invalid partition object") attr(r, "Ordered") <- TRUE # (cluster , s.i ) attr(r, "call") <- x$call class(r) <- "silhouette" r } silhouette.clara <- function(x, full = FALSE, ...) { if(!full) return(NextMethod()) ##-> silh*.partition() ## else : full = TRUE if(is.null(x$data)) stop("full silhouette is only available for results of 'clara(*, keep.data = TRUE)'") ## Compute "full" silhouette -- from clustering + full distances: r <- silhouette(x$clustering, daisy(x$data, metric = attr(x, "Metric"))) attr(r, "call") <- substitute(silhouette(CL, full = TRUE), list(CL = x$call)) r } ## R-only implementation -- no longer used nor exported: silhouette.default.R <- function(x, dist, dmatrix, ...) { cll <- match.call() if(is.list(x) && !is.null(cl <- x$clustering)) x <- cl n <- length(x) if(!all(x == round(x))) stop("'x' must only have integer codes") k <- length(clid <- sort(unique(x))) if(k <= 1 || k >= n) return(NA) ## check dist/dmatrix if(missing(dist)) { if(missing(dmatrix)) stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'") if(is.null(dm <- dim(dmatrix)) || length(dm) != 2 || !all(n == dm)) stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'") } else { # 'dist' dist <- as.dist(dist) # hopefully if(n != attr(dist, "Size")) stop("clustering 'x' and dissimilarity 'dist' are incompatible") dmatrix <- as.matrix(dist)# so we can apply(.) below } wds <- matrix(NA, n,3, dimnames = list(names(x), c("cluster","neighbor","sil_width"))) for(j in 1:k) { # j-th cluster: Nj <- sum(iC <- x == clid[j]) wds[iC, "cluster"] <- clid[j] ## minimal distances to points in all other clusters: diC <- rbind(apply(dmatrix[!iC, iC, drop = FALSE], 2, function(r) tapply(r, x[!iC], mean)))# (k-1) x Nj ## max.col() breaks ties at random; rather do not want random ## behavior of silhouette, (but rather "pam" compatible one): minC <- apply(diC, 2, which.min) ## FIXME minC <- max.col(-t(diC)) ## FIXME : extend max.col(*, ties.method = "min") {or similar} ! wds[iC,"neighbor"] <- clid[-j][minC] s.i <- if(Nj > 1) { a.i <- colSums(dmatrix[iC, iC])/(Nj - 1) # length(a.i)= Nj b.i <- diC[cbind(minC, seq(along = minC))] ifelse(a.i != b.i, (b.i - a.i) / pmax(b.i, a.i), 0) } else 0 wds[iC,"sil_width"] <- s.i } attr(wds, "Ordered") <- FALSE attr(wds, "call") <- cll class(wds) <- "silhouette" wds } ## silhouette.default.R silhouette.default <- function(x, dist, dmatrix, ...) { cll <- match.call() if(is.list(x) && !is.null(cl <- x$clustering)) x <- cl n <- length(x) if(!all(x == round(x))) stop("'x' must only have integer codes") k <- length(ux <- unique(x <- as.integer(x))) if(k <= 1 || k >= n) # silhouette undefined for trivial clusterings return(NA) doRecode <- (any(ux < 1) || any(ux > k)) ## need to recode if(doRecode) x <- as.integer(fx <- factor(x)) # now *has* values in 1:k ## check dist/dmatrix has.dmatrix <- missing(dist) if(has.dmatrix) { if(missing(dmatrix)) stop("Need either a dissimilarity 'dist' or diss.matrix 'dmatrix'") if(is.null(dm <- dim(dmatrix)) || length(dm) != 2 || !all(n == dm)) stop("'dmatrix' is not a dissimilarity matrix compatible to 'x'") } else { # 'dist' dist <- as.dist(dist) # hopefully if(n != attr(dist, "Size")) stop("clustering 'x' and dissimilarity 'dist' are incompatible") } out <- .C(sildist, d = as.numeric(if(has.dmatrix) dmatrix else dist), as.integer(n), x, as.integer(k), diC = numeric(n*k), counts = integer(k), si = numeric(n), neighbor = integer(n), ismat = has.dmatrix)[c("si", "neighbor")] if(doRecode) { code.x <- as.integer(levels(fx)) x <- code.x[x] } wds <- cbind(cluster = x, neighbor = if(doRecode) code.x[out$neighbor] else out$neighbor, "sil_width" = out$si) if(doRecode) attr(wds, "codes") <- code.x attr(wds, "Ordered") <- FALSE attr(wds, "call") <- cll class(wds) <- "silhouette" wds } sortSilhouette <- function(object, ...) { if(is.null(n <- nrow(object)) || n < 1) stop("invalid silhouette structure") if(attr(object,"Ordered")) { if(is.null(attr(object, "iOrd"))) attr(object, "iOrd") <- 1:n return(object) } ## Else : if(is.null(rownames(object))) rownames(object) <- as.character(1:n) ## k <- length(clid <- sort(unique(cl <- object[,"cluster"])))# cluster ID s cl <- object[,"cluster"] r <- object[iOrd <- order(cl, - object[,"sil_width"]) , , drop = FALSE] ## r has lost attributes of object; restore them, but do *not* ## change dimnames: nms <- names(at <- attributes(object)) for(n in nms[!(nms %in% c("dim","dimnames","iOrd","Ordered"))]) attr(r, n) <- at[[n]] attr(r,"iOrd") <- iOrd # the ordering attr(r,"Ordered") <- TRUE r } summary.silhouette <- function(object, FUN = mean, ...) { if(ncol(object) != 3) stop("invalid 'silhouette' object") cl <- object[, "cluster"] si <- object[, "sil_width"] r <- list(si.summary = summary(si, ...), clus.avg.widths = tapply(si, cl, FUN), clus.sizes = table(cl), avg.width = FUN(si), call = attr(object,"call"), codes = attr(object,"codes"), Ordered = attr(object,"Ordered")) class(r) <- "summary.silhouette" r } print.summary.silhouette <- function(x, ...) { k <- length(csiz <- x$clus.sizes) cls <- paste("Cluster sizes", if(!is.null(x$codes)) paste(", ids = (",paste(x$codes, collapse=", "),"),", sep=""), sep="") cat("Silhouette of", sum(csiz), "units in", k, "clusters", if(!is.null(x$call)) paste("from", deparse(x$call)), ":\n", cls, "and average silhouette widths:\n") cwid <- x$clus.avg.widths names(cwid) <- csiz print(cwid, ...) cat("Individual silhouette widths:\n") print(x$si.summary, ...) invisible(x) } ## This was the internal function silhouPlot() in plot.partition() : plot.silhouette <- function(x, nmax.lab = 40, max.strlen = 5, main = NULL, sub = NULL, xlab = expression("Silhouette width " * s[i]), col = "gray", do.col.sort = length(col) > 1, border = 0, cex.names = par("cex.axis"), do.n.k = TRUE, do.clus.stat = TRUE, ...) { if(!is.matrix(x) || ncol(x) != 3) stop("No valid silhouette information (#{clusters} =? 1)") n <- nrow(x) x <- sortSilhouette(x) s <- rev(x[, "sil_width"]) space <- c(0, rev(diff(cli <- x[, "cluster"]))) space[space != 0] <- 0.5 # gap between clusters axisnames <- (n < nmax.lab) if(axisnames) names <- substring(rev(rownames(x)), 1, max.strlen) if(is.null(main)) { main <- "Silhouette plot" if(!is.null(cll <- attr(x,"call"))) { # drop initial "silhouette": if(!is.na(charmatch("silhouette", deparse(cll[[1]])))) cll[[1]] <- as.name("FF") main <- paste(main, "of", sub("^FF","", deparse(cll))) } } smry <- summary(x) k <- length(nj <- smry$clus.sizes) # k clusters if(is.null(sub)) sub <- paste("Average silhouette width : ", round(smry$avg.width, digits = 2)) if(do.col.sort && (lc <- length(col)) > 1) { if(lc == k)# cluster wise coloring col <- col[cli] else ## unit wise coloring if(lc != n) col <- rep(col, length = n) col <- rev(col) # was rev(col[attr(x, "iOrd")]) } y <- barplot(s, space = space, names = names, xlab = xlab, xlim = c(min(0, min(s)), 1), horiz = TRUE, las = 1, mgp = c(2.5, 1, 0), col = col, border = border, cex.names = cex.names, axisnames = axisnames, ...) title(main = main, sub = sub, adj = 0) if(do.n.k) { mtext(paste("n =", n), adj = 0) mtext(substitute(k ~~ "clusters" ~~ C[j], list(k=k)), adj= 1) } if(do.clus.stat) { mtext(expression(paste(j," : ", n[j]," | ", ave[i %in% Cj] ~~ s[i])), adj = 1.04, line = -1.2) y <- rev(y) hasCodes <- !is.null(cx <- attr(x,"codes")) for(j in 1:k) { j. <- if(hasCodes) cx[j] else j yj <- mean(y[cli == j.]) text(1, yj, paste(j.,": ", nj[j]," | ", format(smry$clus.avg.widths[j], digits = 1, nsmall = 2)), xpd = NA, adj = 0.8) } } } cluster/R/agnes.q0000644000175100001440000001445712540507660013451 0ustar hornikusers#### $Id: agnes.q 6953 2015-06-18 09:30:24Z maechler $ agnes <- function(x, diss = inherits(x, "dist"), metric = "euclidean", stand = FALSE, method = "average", par.method, keep.diss = n < 100, keep.data = !diss, trace.lev = 0) { METHODS <- c("average", "single","complete", "ward","weighted", "flexible", "gaverage") ## hclust has more; 1 2 3 4 5 6 7 meth <- pmatch(method, METHODS) if(is.na(meth)) stop("invalid clustering method") if(meth == -1) stop("ambiguous clustering method") cl. <- match.call() method <- METHODS[meth] if(method == "flexible") { ## Lance-Williams formula (but *constant* coefficients): stopifnot((np <- length(a <- as.numeric(par.method))) >= 1) attr(method,"par") <- par.method <- if(np == 1)## default (a1= a, a2= a, b= 1-2a, c = 0) c(a, a, 1-2*a, 0) else if(np == 3) c(a, 0) else if(np == 4) a else stop("'par.method' must be of length 1, 3, or 4") ## if(any(par.method[1:2]) < 0) ## warning("method \"flexible\": alpha_1 or alpha_2 < 0 can give invalid dendrograms" } else if (method == "gaverage") { attr(method,"par") <- par.method <- if (missing(par.method)) { ## Default par.method: Using beta = -0.1 as advised in Belbin et al. (1992) beta <- -0.1 c(1-beta, 1-beta, beta, 0) } else { stopifnot((np <- length(b <- as.numeric(par.method))) >= 1) if(np == 1)## default (a1= 1-b, a2= 1-b, b= b, c= 0) c(1-b, 1-b, b, 0) else if(np == 3) c(b, 0) else if(np == 4) b else stop("'par.method' must be of length 1, 3, or 4") } ## if(any(par.method[1:2]) < 0) ## warning("method \"gaverage\": alpha_1 or alpha_2 < 0 can give invalid dendrograms" } else ## dummy (passed to C) par.method <- double() if((diss <- as.logical(diss))) { ## check type of input vector if(anyNA(x)) stop("NA-values in the dissimilarity matrix not allowed.") if(data.class(x) != "dissimilarity") { # try to convert to if(!is.null(dim(x))) { x <- as.dist(x) # or give an error } else { ## possibly convert input *vector* if(!is.numeric(x) || is.na(n <- sizeDiss(x))) stop("'x' is not and cannot be converted to class \"dissimilarity\"") attr(x, "Size") <- n } class(x) <- dissiCl if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified" } n <- attr(x, "Size") dv <- x[lower.to.upper.tri.inds(n)] ## prepare arguments for the Fortran call dv <- c(0., dv)# "double", 1st elem. "only for Fortran" (?) jp <- 1L mdata <- FALSE ndyst <- 0 x2 <- double(1) } else { ## check input matrix and standardize, if necessary x <- data.matrix(x) if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.") x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x storage.mode(x2) <- "double" ndyst <- if(metric == "manhattan") 2 else 1 n <- nrow(x2) jp <- ncol(x2) if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs jtmd <- integer(jp) jtmd[apply(inax, 2L, any)] <- -1L ## VALue for MISsing DATa valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE))) x2[inax] <- valmisdat } dv <- double(1 + (n * (n - 1))/2) } if(n <= 1) stop("need at least 2 objects to cluster") stopifnot(length(trace.lev <- as.integer(trace.lev)) == 1) C.keep.diss <- keep.diss && !diss res <- .C(twins, as.integer(n), as.integer(jp), x2, dv, dis = double(if(C.keep.diss) length(dv) else 1), jdyss = if(C.keep.diss) diss + 10L else as.integer(diss), if(mdata) rep(valmisdat, jp) else double(1), if(mdata) jtmd else integer(jp), as.integer(ndyst), 1L,# jalg = 1 <==> AGNES meth,# integer integer(n), ner = integer(n), ban = double(n), ac = double(1), par.method, merge = matrix(0L, n - 1, 2), # integer trace = trace.lev) if(!diss) { ##give warning if some dissimilarities are missing. if(res$jdyss == -1) stop("No clustering performed, NA-values in the dissimilarity matrix.\n" ) if(keep.diss) { ## adapt Fortran output to S: ## convert lower matrix,read by rows, to upper matrix, read by rows. disv <- res$dis[-1] disv[disv == -1] <- NA disv <- disv[upper.to.lower.tri.inds(n)] class(disv) <- dissiCl attr(disv, "Size") <- nrow(x) attr(disv, "Metric") <- metric attr(disv, "Labels") <- dimnames(x)[[1]] } ##add labels to Fortran output if(length(dimnames(x)[[1]]) != 0) order.lab <- dimnames(x)[[1]][res$ner] } else { if(keep.diss) disv <- x ##add labels to Fortran output if(length(attr(x, "Labels")) != 0) order.lab <- attr(x, "Labels")[res$ner] } clustering <- list(order = res$ner, height = res$ban[-1], ac = res$ac, merge = res$merge, diss = if(keep.diss)disv, call = cl., method = METHODS[meth]) if(exists("order.lab")) clustering$order.lab <- order.lab if(keep.data && !diss) { if(mdata) x2[x2 == valmisdat] <- NA clustering$data <- x2 } class(clustering) <- c("agnes", "twins") clustering } summary.agnes <- function(object, ...) { class(object) <- "summary.agnes" object } print.agnes <- function(x, ...) { cat("Call: ", deparse(x$call), "\nAgglomerative coefficient: ", format(x$ac, ...), "\nOrder of objects:\n") print(if(length(x$order.lab) != 0) x$order.lab else x$order, quote = FALSE, ...) cat("Height (summary):\n"); print(summary(x$height), ...) cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } print.summary.agnes <- function(x, ...) { ## a bit more than print.agnes() .. cat("Object of class 'agnes' from call:\n", deparse(x$call), "\nAgglomerative coefficient: ", format(x$ac, ...), "\nOrder of objects:\n") print(if(length(x$order.lab) != 0) x$order.lab else x$order, quote = FALSE, ...) cat("Merge:\n"); print(x$merge, ...) cat("Height:\n"); print(x$height, ...) if(!is.null(x$diss)) { ## Dissimilarities: cat("\n"); print(summary(x$diss, ...)) } cat("\nAvailable components:\n"); print(names(x), ...) invisible(x) } as.dendrogram.twins <- function(object, ...) ## ... : really only 'hang' as.dendrogram(as.hclust(object), ...) cluster/R/zzz.R0000644000175100001440000000021211407171057013130 0ustar hornikusers.onUnload <- function(libpath) { library.dynam.unload("cluster", libpath) } ## no S4 methodology here; speedup : .noGenerics <- TRUE cluster/MD50000644000175100001440000001610512553357106012273 0ustar hornikuserse2ac57fa1baf6e59d4198c05eea5403a *ChangeLog 747b831940ca1896d407445f918839dc *DESCRIPTION 824c6b9820b7fc963b52b185b8284481 *INDEX 1e7f8aeba5a46fb513e3eeea883df709 *NAMESPACE 8de82c7d42bd4a27c27c25462ae2a307 *PORTING 7ed39d909d775dbc3490c8a544661deb *R/0aaa.R 9a36ddc2124718c79ca517ff09b7cdc0 *R/agnes.q 6d2080410335325eaf85f28171321177 *R/clara.q 70fff0c35d52283501f2b6e66d938b09 *R/clusGap.R e93355a6f7ad8d8486e6db4db52200a9 *R/coef.R a31431660af05e1053a37801b11800df *R/daisy.q 6af9ad032a747db5b062426c5f10dc30 *R/diana.q 00e9f508a139668d7a24ef7ca59696ae *R/ellipsoidhull.R 86a134c01b58fae094b8b064c4af3d60 *R/fanny.q ff1cec6103f81407f16981ffd44d1fd5 *R/internal.R b06ce16d80187d3b1e7ae9c248a7b202 *R/mona.q 11e2b4ba0c81427a28f9238cdd749546 *R/pam.q 4202616d1b9da174d3870a9866a467b9 *R/plothier.q 08c3b07c34462f8dfbb50669ee7eb4f0 *R/plotpart.q ac6ea20501b14230d4f2fcf97abda367 *R/silhouette.R f1d53c5f24897b5ab0e6014be9be7844 *R/zzz.R 1f5615783dc47c80ed105754db24330a *README ac189f8e6e1314c01a700f6a31ee4506 *data/agriculture.tab 1046f7a750dbbac2724fd910aff561e7 *data/animals.tab c8f60e65b0356698f6b08c23c07e2a94 *data/chorSub.rda 0f34ac1e10d178fa04a4babb705d5b27 *data/flower.R 0772c2f9932873c7dd6551208cd0725b *data/plantTraits.rda 1022570c32295066506ad7b26439f7bf *data/pluton.tab eda904e011e79ea5a25602125d558d04 *data/ruspini.tab e816e6c67444107ab47ff3bf2f6023f2 *data/votes.repub.tab be46f45035b177b940393db459789282 *data/xclara.rda 2d1d8b81f92e1e97b282390e90941497 *inst/CITATION 4fd58df611fbd16f0a9732b538168e4d *inst/NEWS.Rd f8f116d623c12dbe5de73d26a22752d0 *inst/po/de/LC_MESSAGES/R-cluster.mo f9a2d92cf3a6860ea049c075770a3948 *inst/po/de/LC_MESSAGES/cluster.mo cd00e93401afd775bd38ecb2199d0489 *inst/po/en@quot/LC_MESSAGES/R-cluster.mo 9848d015a85f4fb5c6b14f923e4837b5 *inst/po/en@quot/LC_MESSAGES/cluster.mo 1f712f272f063873900d80c700fe13bb *inst/po/fr/LC_MESSAGES/R-cluster.mo 29be8a0b5b76d45fc4be564be520e3ab *inst/po/ko/LC_MESSAGES/R-cluster.mo ca271a7cea8776101e346c51a2dc0459 *inst/po/ko/LC_MESSAGES/cluster.mo 792bf7e6a512e2f0c6abcd541f4b2981 *inst/po/pl/LC_MESSAGES/R-cluster.mo 4c1bacfefc5732e1747d2e7a8889618f *man/agnes.Rd 29d26f3fb10f3462291c10d441865e71 *man/agnes.object.Rd 7db03338761019b70d064ffe1eddcc5d *man/agriculture.Rd f72a36d67c7c7b58399dbcaf473acdd9 *man/animals.Rd 80586a34dc1e14f840ebae455aeb9736 *man/bannerplot.Rd 0b1033484c0b66ff9451427930e92987 *man/chorSub.Rd f4e46ab4ab26d4683f8a171e77789f55 *man/clara.Rd b86f299b6994b98e2112843516f3108a *man/clara.object.Rd c399419a8cdfdb7ba75f760f44423125 *man/clusGap.Rd 40a5989adc708a30f395ef420d43788c *man/clusplot.default.Rd ea3ea6469c8f57eafa1229f82b78c30c *man/clusplot.partition.Rd c7341c96f49e5b288448c4cb9436c2fa *man/cluster-internal.Rd 20b35f88ced8e679778892a732a43369 *man/coef.hclust.Rd 2bf0822b886ae7b5b8ae715f95b154ce *man/daisy.Rd ee5e0c36deba584f30ab869222a229df *man/diana.Rd aa9c2fe350e02eb23f211a44e40c8a90 *man/dissimilarity.object.Rd f2ca47a9ae28aa3fb9c74226e682d4a3 *man/ellipsoidhull.Rd c9c46e3cb33abe97c38e319c256496f9 *man/fanny.Rd 7d549aed091402cecc8a398085e4bb86 *man/fanny.object.Rd 94bfe5845b4efa6bffec6c455081a237 *man/flower.Rd f9c1ca445301e6c2ed69986d96ab5676 *man/lower.to.upper.tri.inds.Rd 23f8d574ca25378bcc7a261d61aa9fcf *man/mona.Rd 546379a2e048bf7ef7a69aff87f4ca50 *man/mona.object.Rd e585377195534f78a94fb72d32f972ca *man/pam.Rd 21795cc8b4bd9b63b24f44e5ffeeccb2 *man/pam.object.Rd cafab0f87fd2443f14e672d325243990 *man/partition.object.Rd 40fe00964d4215ce1b2390e8f18e9dc0 *man/plantTraits.Rd 60b5c55053eca83e5d86a3add0bdf460 *man/plot.agnes.Rd e7b16368335969b777ccebde36b578a8 *man/plot.diana.Rd 936341a14d58811b41e45b09fd8b37bb *man/plot.mona.Rd ee6a690d0f2c7a25f3b8f77881778137 *man/plot.partition.Rd 9b7b312a6d216468cf9bf3c90ab349b9 *man/pltree.Rd 84b2723e904c2b1897a00043106b458e *man/pluton.Rd 8b34c88f90c91ce710b2ccc65e91a486 *man/predict.ellipsoid.Rd d74abf0fc1c013f696f0b8ddd724b094 *man/print.agnes.Rd b6384eb685030609ae9edd06434949b0 *man/print.clara.Rd e0c63f114cc0bf71fe53964b5f883255 *man/print.diana.Rd b32046c766441c7fc75f644734c690b1 *man/print.dissimilarity.Rd 1ce3568140412485f727b2d9193ba09c *man/print.fanny.Rd 0dcf3dd3c5afbb216939ca9158c32192 *man/print.mona.Rd b1c1625935a5c22d81aa2e73d3b80457 *man/print.pam.Rd 7cd999938af26fb5c97e4d45ab9ea982 *man/ruspini.Rd 0a8d0bf239f04ce75906bdc86022d4cb *man/silhouette.Rd 8beea8b2090f7e91d0a0b69ec2013718 *man/sizeDiss.Rd 806f75ba78b8e959f53fefad76a0dca6 *man/summary.agnes.Rd 1024351710e393761532b043ff37df7f *man/summary.clara.Rd 1536a3f848f81661af3d45c06697e072 *man/summary.diana.Rd 423bde4e918538a7f6e0b1d4aa0e7d6f *man/summary.mona.Rd 5cc8d9a8fa53b437121d841475d46b46 *man/summary.pam.Rd 1f622b89b4b8b0e93e3f0abd65122ee4 *man/twins.object.Rd 69c2598048e0488ca72d0809d1d3214a *man/volume.ellipsoid.Rd 0510d0816550f6d3c258652912053a1d *man/votes.repub.Rd 438589447329748ecc79553dc159e458 *man/xclara.Rd 0dfc2c27c996bacbd92162328d4d766e *po/R-cluster.pot 1cb73d7c5cd45e11a7f86885c06035e4 *po/R-de.po e9b5293e63746638be1f3570dbeb4fe3 *po/R-en@quot.po dfecc6446b2f9de1c738f749c9ff113b *po/R-fr.po e255100c6d897687f22fce53f8fbe0e1 *po/R-ko.po ff29770ccb2b1ff78e2a28ecd720326d *po/R-pl.po 3c9ac6c4265443d311a197d968188426 *po/cluster.pot afd484b55aab3179e672e847eaf9dbef *po/de.po 1c3b0c6fd5b301dc611c5f5864d3efc3 *po/ko.po 86e308f92ccc269bcb494de5ac0b2481 *po/update-me.sh e4cd4582ad61fb9e8964b9947ea8228e *src/clara.c 4a676732af8a929c21183d6a949b2536 *src/cluster.h 4ae4b8fc0d4ba8a1a44bd9fc43e08626 *src/daisy.f 20241ba8aa6ae6296c5acec015f1f906 *src/dysta.f adb9138bb0f62e4b9cd6e962e9905185 *src/fanny.c 23cea00d2feab57a92e8c2393c7f4b8a *src/ind_2.h 7b58c3904ee2bc3e60b328059e36a125 *src/init.c 77001343e1648a21ebe395a6d8561648 *src/mona.f b0f0aec70a384e7596d01187a87a90ad *src/pam.c 6f11dc74b1636e3ac848563d570f0030 *src/sildist.c f42f05132aaf001ddd333e3e109692e0 *src/spannel.c 2320f73cecf7f13c4de20cdca85fb894 *src/twins.c 8e2ad970e89446f89946d30d66a4876d *tests/agnes-ex.R 10be212d363567cc94158c42035bb4e4 *tests/agnes-ex.Rout.save 8b4f445da3ac72cff39d4a36b6250316 *tests/clara-NAs.R 6bd95003eefaf21f013f6e6ef4bb9c92 *tests/clara-NAs.Rout.save 2cbf024db5a82a258448f19ef752b061 *tests/clara-ex.R 2f15ef0bcce52548c1d134a59395dbaf *tests/clara.R beee63e77e09c0c45dc13e8d0bcd0da1 *tests/clara.Rout.save 79c68eadc1e1f72d909b7c015a278cc7 *tests/clusplot-out.R b583d20874ec3960ec1cc06d8ae1d447 *tests/clusplot-out.Rout.save 35fd3185d685723e528a435514b38604 *tests/daisy-ex.R 61459193985591197aadfff191f49940 *tests/daisy-ex.Rout.save 15d472538e911a5a8da7970ae6d889c3 *tests/diana-boots.R 4fc11382af41801e16128f96e17a70e7 *tests/diana-ex.R c10f01e8f1c0a008a92d674670db931a *tests/diana-ex.Rout.save bfd6bb81ad19948dc6e91d46ab5ff30f *tests/ellipsoid-ex.R 0fc615ac20f944d5f5bb974abe049c01 *tests/ellipsoid-ex.Rout.save 52b341bc06eb5692a73dec2be2cd7e5a *tests/fanny-ex.R 7ea9c35599c857c5393421da3570ea0a *tests/mona.R 794166e31834ecfb147e62843ad7931a *tests/mona.Rout.save d43133f1b110cd360fda4cb86b693d99 *tests/pam.R 44aeafcb0336f7a98b6500d9b7260ee9 *tests/pam.Rout.save 477cd7fd12117a6cbcdfc9d5944fbd39 *tests/silhouette-default.R 88f4c305ccf9133ec34c2fda0935942e *tests/silhouette-default.Rout.save d9cdce1776e344a6f4f2574cee6ef487 *tests/sweep-ex.R cluster/README0000644000175100001440000000264210307303426012633 0ustar hornikusersORIGINAL README : This directory contains code, help and examples for CLUS, an S-PLUS package for clustering, as described in ``Clustering in an Object-Oriented Environment'' by Anja Struyf, Mia Hubert, and Peter J. Rousseeuw (Journal of Statistical Software, volume 1). ------------------------------------------------------------------------ See http://www.stat.ucla.edu/journals/jss/ for the original version. The current port is based on material now on http://www.agoras.ua.ac.be/ KH 1998/05/21 --------- For historical reasons, we keep R/README-Splus which has no relevance to the R package. --------------------- TODO {see ./DONE-MM for things done} 3) daisy() for the case of mixed variables should allow a weight vector (of length p = #vars) for up- or downweighing variables. daisy() really should accept the other methods mva's dist() does _and_ it should use dist's C API -- but we have no C API for package code, ARRGH! 4) Eliminate the many Fortran (g77 -Wall) warnings of the form >> mona.f:101: warning: `jma' might be used uninitialized in this function 6) Mona objects describe a hierarchical clustering; they could also inherit from twins, and hence have a pltree() method for plotting the hierarchical tree. 8b) Think about "merging" the plot.agnes and plot.diana methods. ------------------ Martin , since 1999 cluster/DESCRIPTION0000644000175100001440000000307512553357106013473 0ustar hornikusersPackage: cluster Version: 2.0.3 Date: 2015-07-20 Priority: recommended Title: "Finding Groups in Data": Cluster Analysis Extended Rousseeuw et al. Description: Methods for Cluster analysis. Much extended the original from Peter Rousseeuw, Anja Struyf and Mia Hubert, based on Kaufman and Rousseeuw (1990) "Finding Groups in Data". Maintainer: Martin Maechler Authors@R: c(person("Martin","Maechler", role=c("aut","cre"), email="maechler@stat.math.ethz.ch") ,person("Peter", "Rousseeuw", role="aut", email="rousse@uia.ua.ac.be", comment = "Fortran original") ,person("Anja", "Struyf", role="aut", comment= "S original") ,person("Mia", "Hubert", role="aut", email= "Mia.Hubert@uia.ua.ac.be", comment= "S original") ,person("Kurt", "Hornik", role=c("trl", "ctb"), email="Kurt.Hornik@R-project.org", comment="port to R; maintenance(1999-2000)") ,person("Matthias", "Studer", role="ctb") ,person("Pierre", "Roudier", role="ctb") ) Depends: R (>= 2.15.0), utils Imports: graphics, grDevices, stats Suggests: MASS SuggestsNote: two small examples using MASS' cov.rob() and mvrnorm() LazyLoad: yes LazyData: yes ByteCompile: yes BuildResaveData: no License: GPL (>= 2) NeedsCompilation: yes Packaged: 2015-07-20 08:32:32 UTC; maechler Author: Martin Maechler [aut, cre], Peter Rousseeuw [aut] (Fortran original), Anja Struyf [aut] (S original), Mia Hubert [aut] (S original), Kurt Hornik [trl, ctb] (port to R; maintenance(1999-2000)), Matthias Studer [ctb], Pierre Roudier [ctb] Repository: CRAN Date/Publication: 2015-07-21 07:53:10 cluster/ChangeLog0000644000175100001440000014532412553131017013532 0ustar hornikusers2014-03-26 Martin Maechler * DESCRIPTION (Version): 1.15.2 * R/*.q updates to make messages translatable; proposed by Lukasz Daniel 2014-03-12 Martin Maechler * DESCRIPTION (Version): 1.15.1 * man/mona.Rd: anyNA2[cbind(, )] <- NA fails in R < 3.0.x 2013-11-06 Martin Maechler * DESCRIPTION (Version): 1.15.0; also using Authors@R * R/agnes.q (agnes): method "gaverage" contributed by Pierre Roudier. 2013-03-26 Martin Maechler * DESCRIPTION (Version): 1.14.5, never released 2013-03-26 Martin Maechler * DESCRIPTION (Version): 1.14.4, released to CRAN, 2013-03-26 * po/R-de.po: trivial update from Detlef, 2012-02-06 Martin Maechler * DESCRIPTION (Version): 1.14.2, released to CRAN today * R/clusGap.R (maxSE): with 5 methods to compute "maximal gap", used also in print() method. 2012-01-21 Martin Maechler * R/clusGap.R (print.clusGap, plot.clusGap): added 2012-01-17 Martin Maechler * R/clusGap.R: new clusGap() for computing the cluster gap statistic. 2011-12-21 Martin Maechler * R/coef.R (coefHier): newly export coefHier(), interfacing to C's bncoef(). * tests/diana-boots.R (dianaBoot): add a "bootstrap", for considerably more tests. * tests/diana-ex.R: reorganized * src/twins.c: newly translated from Fortran, for more future flexibility * src/twins.f: no longer there * R/agnes.q (as.dendrogram.twins): define for 'twins' rather than just 'agnes'. 2011-12-03 Martin Maechler * R/plotpart.q (clusplot.default): xlab, and ylab arguments; new 's.x.2d' argument which allows to specify the plot setup in details. (clas.snijpunt): also check for NAs in x[1:2,m]. Suggestions originally from Peter Ruckdeschel 2011-10-16 Martin Maechler * po/: setup of for translation 2011-09-14 Martin Maechler * inst/CITATION: update according to state-of-the-art. 2011-08-29 Martin Maechler * DESCRIPTION (Version): 1.14.1, released to CRAN, 2011-10-16 * R/silhouette.R (plot.silhouette): col = had ordering bug. * man/silhouette.Rd: added example 2011-06-07 Martin Maechler * DESCRIPTION (Version): 1.14.0, released to CRAN, 2011-06-07 * R/plotpart.q (clusplot.default): use message() and warning() appropriately instead of just cat(). Further, add a 'add = FALSE' argument which allows *adding* ellipses to an existing plot. 2011-06-06 Martin Maechler * R/plotpart.q (clusplot.default): new 'cex' and 'cex.txt', the latter for the "point" labels. 2011-05-20 Martin Maechler * src/pam.c (cl_pam): trace_lev: also trace the computation of dissimilarities (a very expensive part, for large n). * R/pam.q (pam): cluster.only=TRUE: if the *computed* dissimilarities have NA's the necessary error was not signalled. 2011-03-07 Martin Maechler * tests/*.Rout.save: update after R-devel's "scientific()" patch. * DESCRIPTION (Version): 1.13.4, *not* released 2011-02-21 Martin Maechler * DESCRIPTION (Version): 1.13.3, released to CRAN today 2010-12-12 Martin Maechler * R/ellipsoidhull.R, *: use "full argument names" 2010-11-09 Martin Maechler * R/clara.q (clara): enforce sampsize > k {as the help file always said}; allow to disable 'DUP=FALSE' via env variable. 2010-11-08 Martin Maechler * src/clara.c (cl_clara): inserting rand_k into nsel[]: simplified and avoiding (typically inconsequential) out_of_bound array access. 2010-10-21 Martin Maechler * DESCRIPTION (Version): 1.13.2 ; Enhances: MASS; released to CRAN, 2010-11-10 * src/cluster.h, src/daisy.f, src/mona.f: * src/init.c (FortEntries): s/cl_/cl/ for now, as some f77/g77 setups cannot deal with it correctly. 2010-06-24 Martin Maechler * DESCRIPTION (Version): 1.13.1 2010-06-23 Martin Maechler * man/daisy.Rd: add our *generalized* Gower formula (LaTeX), and update the detailled description accordingly. 2010-06-02 Martin Maechler * DESCRIPTION (Version): 1.13.0 * src/daisy.f (cl_daisy): renamed from daisy * src/init.c, src/*.[chf]: renamed others, so we can use * NAMESPACE (useDynLib): .registration = TRUE 2010-05-28 Martin Maechler * R/daisy.q (daisy): using variable weights (for gower). * src/daisy.f: ditto 2010-03-29 Martin Maechler * DESCRIPTION (Version): 1.12.3 * R/plothier.q (bannerplot): do not use formals(pretty) 2009-12-05 Martin Maechler * man/daisy.Rd: fix page number for Gower (1971) reference. 2009-10-05 Martin Maechler * DESCRIPTION (Version): 1.12.1 * man/ellipsoidhull.Rd: fix missing \link ((parts former 'graphics' have long been moved to 'grDevices'. 2009-05-13 Martin Maechler * DESCRIPTION (Version): 1.12.0 -- released to CRAN * R/silhouette.R (silhouette.default, print.summary.*, plot.*): silhouette(x, *) and its methods now also work when x uses integer codes different than 1:k. Previously, this could seg.fault. * tests/silhouette-default.R: test it. 2009-03-12 Martin Maechler * man/daisy.Rd: fix typo/thinko (thanks to Felix Mueller). 2009-01-21 Martin Maechler * R/agnes.q (as.dendrogram.agnes): new utility * NAMESPACE: exported 2009-01-17 Martin Maechler * man/plot.agnes.Rd: link to pltree.twins() directly * R/agnes.q (print.summary.agnes): output 'agnes' instead of `agnes' 2009-01-07 Martin Maechler * DESCRIPTION (Version, Date): 1.11.12 and CRAN-released 2009-01-05 Martin Maechler * man/ellipsoidhull.Rd: fix for more stringent Rd parsing 2008-09-16 Martin Maechler * R/ellipsoidhull.R (print.ellipsoid): and others: replace backquotes ( ` ) by regular quotes. 2008-06-15 Martin Maechler * R/silhouette.R (sortSilhouette): do *not* overwrite the correctly reordered rownames: Fixes very long standing bug which lead to wrong observation labels in silhouette plots. 2008-04-10 Martin Maechler * R/zzz.R (.onUnload): add ..dynam.unload()ing 2008-02-29 Martin Maechler * DESCRIPTION (Date, Version): 1.11.10 and release 2007-12-19 Martin Maechler * src/pam.c (cstat, dark): move variable declarations to local context. (bswap): add a few R_CheckUserInterrupt() calls * R/pam.q (pam): new argument 'do.swap = TRUE'. Setting it to FALSE allows to skip the more expensive "SWAP" phase. * man/pam.Rd: (ditto) 2007-12-18 Martin Maechler * src/ind_2.h (ind_2): for m := max(i,j) > 46342, (m-2)*(m-1) give integer overflow which leads to a seg.fault. Now fixed. * src/pam.c (bswap): slightly improve trace output. * tests/pam.R, tests/pam.Rout.save: corresponding slight adaptions. * man/pam.Rd: "large" and * man/clara.Rd: "small" datasets; pam() works with thousands ... * R/clara.q (clara): error out when users try clara(, ...). 2007-09-28 Martin Maechler * DESCRIPTION (Date, Version): 1.11.9 * man/pam.Rd: in invisible example, do not use partially matched [[. 2007-09-04 Martin Maechler * src/mona.f (mona): dummy initializations for "-Wall" * src/fanny.c (fygur): dummy init 'lang' 2007-08-23 Martin Maechler * src/daisy.f et al: de-tabify the fortran sources 2007-06-05 Martin Maechler * DESCRIPTION (Version): 1.11.7 (mainly for R-devel) 2007-06-02 Martin Maechler * man/plot.diana.Rd, etc: s/dendrogramm/dendrogram/ 2007-05-31 Martin Maechler * man/plantTraits.Rd: add \encoding{latin1} 2007-04-09 Martin Maechler * DESCRIPTION (Version): 1.11.6 - needed for R version >= 2.6.0. * R/daisy.q (daisy): for 2-valued variables only warn about non-specified type when n >= 10. 2007-04-07 Martin Maechler * R/diana.q (diana): don't use as.double(.) and DUP=FALSE ! 2007-03-30 Martin Maechler * man/diana.Rd: merge man/diana.object.Rd into *one* help file * DESCRIPTION (Version): 1.11.5 - for R 2.5.0 2007-03-29 Martin Maechler * R/silhouette.R (silhouette.default): use x$clustering only when x is a list. 2006-12-11 Martin Maechler * DESCRIPTION (Version): 1.11.4 -- ready * tests/to-25x, tests/to-24x: different outputs for R 2.5.0 (unstable) * tests/clara.Rout.save-R25x,..: and R-2.4.x * tests/silhouette-default.R: use [as.integer(rownames(.)), ] since we cannot guarantee rownames there. 2006-12-05 Martin Maechler * src/fanny.c (fuzzy): 14 instead of 15 sign.digits for trace_lev >= 2 printing. * R/clara.q (stop): message for NA-observations: only show first dozen of observation numbers. * tests/clara-NAs.R: test the above * tests/clara-NAs.Rout.save: ditto 2006-12-02 Martin Maechler * DESCRIPTION (Version): 1.11.3 -- released to CRAN * R/daisy.q (daisy): finish metric "gower", and test an example 2006-12-01 Martin Maechler * man/daisy.Rd: more content about Gower; thanks to Gavin Simpson 2006-11-23 Martin Maechler * R/daisy.q (daisy): metric = "gower" -- not yet finished 2006-09-07 Martin Maechler * DESCRIPTION (Version): 1.11.2 released * src/init.c: register dysta3 as C, not Fortran; drop trailing "_" ==> need to adapt these: * src/fanny.c: * src/cluster.h: * tests/dysta-ex.R: * tests/dysta-ex.Rout.save: 2006-08-24 Martin Maechler * DESCRIPTION (Version): 1.11.1 released to CRAN * R/silhouette.R (sortSilhouette): make sure "iOrd" is there also if "Ordered" is TRUE. (fixes plotting w/ col = 'clusterwise'.) 2006-08-23 Martin Maechler * tests/mona.R: update (slightly extend) regression test * tests/mona.Rout.save: consequence * R/mona.q (mona): replace apply(, 2, factor) with safer code, to work correctly in R 2.4.0 and later (thanks to Brian). 2006-06-12 Martin Maechler * R/plotpart.q (clusplot.default): new argument 'sub'; default is unchanged, but you can suppress the subtitle. 2006-05-17 Martin Maechler * src/fanny.c (fygur): a little more cleanup -- for compatibility with dark() in src/pam.c * DESCRIPTION (Version): 1.11.0, to be released to CRAN 2006-05-03 Martin Maechler * DESCRIPTION (Depends): require R >= 2.2.1 -- which should have happened earlier, since registration of Fortran symbols failed before. 2006-04-18 Martin Maechler * man/plantTraits.Rd: new dataset with many variables, including all kinds of factors, provided by Jeanne Vallet. * data/plantTraits.rda: useful for daisy() illustrations 2006-04-12 Martin Maechler * R/fanny.q (fanny): several new options: metric = "SqEuclidean", iniMem.p: initial membership matrix can be specified; cluster.only, keep.diss, keep.data: for saving memory/speed. warning when k.crisp < k. printing of $membership is now in rounded percent * man/fanny.Rd: document the above * tests/fanny-ex.R: much extended 2006-04-11 Martin Maechler * man/chorSub.Rd: new dataset; * data/chorSub.Rd: useful for fanny() illustrations * R/fanny.q (fanny): Dunn's partition coefficient (and its normalization) now always use sum.. u.. ^ 2 (and not "^ memb.exp"). option 'trace.lev' for iteration monitoring * src/fanny.c (fuzzy): (dito) 2006-04-08 Martin Maechler * src/fanny.c: Translated from Fortran (fanny.f); potentially with more options, etc. 2006-03-20 Martin Maechler * DESCRIPTION (Version): 1.10.5 for CRAN * .rsync-exclude: new file useful since I moved my RCS archive to SVN (using 'cvs2svn' and a perl script on 'svn-stat'). 2006-02-27 Martin Maechler * src/sildist.c (sildist): 2nd round (by Romain and me) 2006-02-25 Martin Maechler * R/silhouette.R (silhouette.default): new C version in file * src/sildist.c (sildist): provided by Romain Francois 2006-02-04 Martin Maechler * R/ellipsoidhull.R (print.ellipsoid): also work without 'conv' slot * man/ellipsoidhull.Rd: finally explain 'd2'; and \link to other ellipse implementations. * man/volume.ellipsoid.Rd: + example of 'ellipsoid' construction 2006-01-31 Martin Maechler * man/bannerplot.Rd: (et al.): use \dQuote{} and \sQuote{} 2006-01-27 Martin Maechler * man/pltree.twins.Rd: explain the as.hclust() dispatch; add example, particularly for plot(as.dendrogram(.)). 2006-01-26 Martin Maechler * DESCRIPTION (Version): 1.10.4 -- released to CRAN 2006-01-25 Martin Maechler * R/silhouette.R (silhouette.clara): added (for "full = TRUE" option) 2006-01-07 Martin Maechler * DESCRIPTION (Version): 1.10.3 * src/init.c: registering all (R-callable) C and Fortran routines * src/cluster.h: declare the remaining callable routines 2005-12-30 Martin Maechler * inst/CITATION: first version * R/silhouette.R (plot.silhouette): 'nmax.lab' was not obeyed properly (labels were drawn for large n when the silhouette had rownames). * man/silhouette.Rd: explain clara's silhouette and "full" one. 2005-09-06 Martin Maechler * man/pluton.Rd: \source: new URL @ www.agoras.ua.ac.be * man/clusplot.default.Rd: dito * man/ellipsoidhull.Rd: "" * README: "" 2005-08-31 Martin Maechler * DESCRIPTION (Version): 1.10.2 released to CRAN * src/clara.c (bswap2): minor cosmetic 2005-08-30 Martin Maechler * R/daisy.q (daisy): the case of a *binary*-only matrix (not data frame), used to fail, now works fine, tested in * tests/daisy-ex.R: * src/twins.f: get rid of gfortran warnings * src/mona.f: dito * src/fanny.f: " * src/meet.f: " 2005-08-06 Martin Maechler * tests/clara*: adapt to slightly changed output * src/clara.c (clara), (bswap2): better trace_lev printing; code more in line with 'pam'. 2005-08-05 Martin Maechler * DESCRIPTION (Version): 1.10.2 {not released} * R/clara.q (clara): no longer support deprecated 'keepdata'; also return 'i.med'; new argument 'medoids.x' allowing to save even more memory; do not assign 'data' when keep.data=FALSE. * man/clara.Rd: + 'i.med' + 'medoids.x' - 'keepdata' * man/clara.object.Rd: dito 2005-07-19 Martin Maechler * tests/silhouette-default.R: if(FALSE) library(*, lib="..MM..") 2005-06-30 Martin Maechler * tests/silhouette-default.R (isTRUE): need for R < 2.1.x 2005-06-29 Martin Maechler * DESCRIPTION (Date): * DESCRIPTION (Version): 1.10.1 * R/silhouette.R (plot.silhouette): format() clus.avg.widths more nicely * R/silhouette.R (silhouette.default): don't use max.col() which returns random. * tests/silhouette-default.R: test only 3rd column difference * tests/silhouette-default.Rout: update {for non-random sil*.default()} 2005-06-20 Martin Maechler * tests/silhouette-default.R: added test for equivalence of silhouette.default() and silhouette.partition() * tests/silhouette-default.Rout.save: new output --> The silhouette() results are *still* not always the same (-> fixed) * R/silhouette.R (silhouette.default): if Nj := #{obs. in C_j} = 1 the silhouette width is 0, not 1, as in pam's original code. * man/silhouette.Rd: document the case of 1-obs-clusters. 2005-06-09 Martin Maechler * DESCRIPTION (Version): 1.10.0 2005-06-08 Martin Maechler * R/pam.q (pam): new argument 'trace.lev = 0' * man/pam.object.Rd: pam() also return $medID * src/pam.c (dark): take special care of empty clusters (ntt == 0) * src/pam.c (pam): also work when medoids are not increasing. * R/pam.q (pam): user-supplied 'medoids': even more checks 2005-06-02 Martin Maechler * tests/fanny-ex.R: use new RNG (hence change example); add samples of memb.exp -> 1 * DESCRIPTION (Author): new formulation for better auto-citation() * src/fanny.f (fanny): 'r' ("membership exponent") becomes user-specifiable argument; dito for 'maxit' * R/fanny.q (fanny): new arguments 'memb.exp' ("membership exponent"); 'tol' and 'maxit' * R/fanny.q (.print.fanny): utility to be used in both print.*() methods; nicer, using formatC() and 'digits' explicitly. * man/fanny.Rd: new arguments, see above * man/fanny.object.Rd: mention new '$ objective' result component * man/print.fanny.Rd: now also for [print.]summary.fanny(); mention new explicit 'digits' argument to print methods. 2005-04-04 Martin Maechler * DESCRIPTION (Version): 1.9.8 * R/daisy.q (print.dissimilarity): rewritten, now based on print.hclust(). * R/zzz.R (gettextf): ..etc: define substitutes such as to allow running in R < 2.1.0 * man/ellipsoidhull.Rd: fixed outdated "link[pkg]" * man/clusplot.default.Rd: dito * man/*.Rd : etc 2005-03-22 Martin Maechler * R/clara.q (clara): start to 'rationalize' messages for future translation. * src/cluster.h: define the _(String) macro (for i18n), not yet used. 2005-01-21 Martin Maechler * man/clara.Rd: get rid of iso-latin1 character 2004-12-14 Martin Maechler * man/agnes.Rd: concept{UPGMA ..} 2004-11-27 Martin Maechler * DESCRIPTION (Version): 1.9.7 (not yet CRAN) * R/plothier.q (bannerplot): ouse default 'main' and 'sub' * man/bannerplot.Rd: + example * R/coef.R (coef.hclust): Implement R function for computing the agglomerative/divisive coefficient. * man/coef.hclust.Rd: new; including example * man/predict.ellipsoid.Rd: wrong link 2004-11-26 Martin Maechler * src/twins.f (bncoef): drop unused argument 'ner' 2004-11-20 Martin Maechler * man/clusplot.default.Rd: fix typo 2004-10-28 Martin Maechler * man/predict.ellipsoid.Rd: add nice example 2004-08-19 Martin Maechler * DESCRIPTION (LazyData): yes 2004-08-12 Martin Maechler * DESCRIPTION (Version): 1.9.6 (Depends): added standard packages * R/daisy.q (daisy): scale=TRUE didn't collaborate with NAs (since 2003-11-29). * tests/daisy-ex.R: add an example, testing the above 2004-08-03 Martin Maechler * DESCRIPTION (Version): 1.9.5 --> to CRAN 2004-07-21 Martin Maechler * man/mona.Rd: drop an extraneous "}\n" that Kurt's new tools found * man/twins.object.Rd: dito 2004-07-06 Martin Maechler * DESCRIPTION (Version): 1.9.5 (not yet released) * man/pam.Rd: note on 'medoids' 2004-07-02 Martin Maechler * R/pam.q: new argument 'medoids' * man/pam.Rd: ex.with 'medoids' * src/pam.c (bswap): new argument 'med_given' 2004-06-29 Martin Maechler * src/pam.c (bswap): moved ammax/nmax computation into same loop as beter[]. added many comments. GOAL: Allow initial medoids user-specification. >>> TODO: do the analogous thing in src/clara.c 's bswap2() 2004-06-24 Martin Maechler * DESCRIPTION (Version): 1.9.4 -- fix dysta() using jtmd[] problems (thanks to valgrind and BDR). * R/agnes.q (agnes): initialize 'jtmd' to length p in any case * R/diana.q (diana): ditto * R/fanny.q (fanny): " * R/pam.q (pam): " * R/daisy.q (daisy): pass 'mdata' to Fortran * src/daisy.f (daisy): new "boolean" argument 'mdata' * src/clara.c (clara): pass has_NA to dysta2() and don't use jtmd[] and valmd[] if(!has_NA) * src/cluster.h (dysta2): new arg 'has_NA' 2004-06-18 Martin Maechler * DESCRIPTION (Version): 1.9.3 {only change = below} * src/ind_2.h (ind_2): use __inline__ only for gcc (__GNUC__) 2004-06-11 Martin Maechler * man/clara.Rd: finish \note{} about 'rngR' (thanks to Christian H). 2004-06-01 Martin Maechler * src/ind_2.h: new file for __inline__ definition of ind_2(), #included from these two files: * src/clara.c: * src/pam.c: * tests/agnes-ex.R: test for "weighted" == "flexible"(0.5) 2004-05-28 Martin Maechler * DESCRIPTION (Version): 1.9.2 (not yet released) * R/agnes.q (agnes): New method = "flexible" (and 'par.method = ..') * src/twins.f (averl): implementing "flexible" Lance-Williams formula. * man/agnes.Rd: 2004-05-25 Martin Maechler * R/silhouette.R (silhouette.default): give silhouette 0 (not NaN) when a_i = b_i {thanks to example and suggestions by Witek Wolsky}. 2004-04-26 Martin Maechler * src/pam.c: cosmetic efficiency improvement from Li Long 2004-03-11 Martin Maechler * DESCRIPTION (Version): 1.8.1 for R versions <= 1.8.1 1.9.1 for R versions >= 1.9.0 -- released to CRAN * src/clara.c (clara): fixed second NA-handling bug: the test should be "nunfs+1 != jran" Fixed "rngR = TRUE" bug: accidentally still used internal RNG. Fixed 3rd bug if (lrg_sam) : need jk < n_sam for nsel[jk]. More & nicer printing for 'trace > 0'. * R/clara.q (clara): more useful error msg for too many NAs (jstop=1) 2004-03-10 Martin Maechler * src/clara.c (clara): finally fixed longstanding bug which could cause wrong results and segmentation faults, instead of ending with the following error: * tests/clara-NAs.R: new file w/ former seg.fault * src/cluster.h (ind_2): new function instead of F77_CALL(meet) * src/clara.c (clara): defined here (and used). * src/pam.c (pam): used here 2004-03-09 Martin Maechler * R/clara.q (clara): sampsize = min(n, ) 2004-03-08 Martin Maechler * tests/agnes-ex.R: do not use object.size(), since * tests/diana-ex.R: it's different on 64 bit, e.g. * R/agnes.q (agnes): \ when diss = TRUE, * R/diana.q (diana): \ but 'x' is not a "dissimilarity", * R/fanny.q (fanny): / try as.dist(x) * R/pam.q (pam): / and be consistent in all 4 functions 2004-01-23 Martin Maechler * R/silhouette.R (silhouette.default): do not require cluster codes in 1:k (Murad Nayal ). * man/silhouette.Rd: accordingly; mention 2 <= k <= n-1 2003-12-22 Martin Maechler * DESCRIPTION (Version): 1.8.0 for R versions <= 1.8.1 1.9.0 for R versions >= 1.9.0 2003-12-19 Martin Maechler * NAMESPACE: Finally make a namespaced package 2003-12-01 Martin Maechler * R/pam.q (print.summary.pam): readable indication *no* L/L* clusters. (print.pam): improve as well. * man/pam.Rd: documenting `cluster.only' 2003-11-29 Martin Maechler * R/daisy.q (daisy): better checking & handling if(stand) * R/pam.q (pam): new argument `cluster.only = FALSE'. If true, only return the clustering vector, no other info. * src/pam.c: translated from src/pam.f and "cleaned"; `all_stats' := NOT(cluster_only). * src/cluster.h: new pam.c related headers * src/dysta.f (dysta): new file: part of former src/pam.f * R/pam.q (pam): use .C 2003-11-28 Martin Maechler * R/daisy.q (daisy): fix the new scale 0 code (from 11-17) 2003-11-17 Martin Maechler * DESCRIPTION (Version): 1.7.7 -- (not released) * tests/daisy-ex.R: new test for this: * R/daisy.q (daisy): scale 0 does not give NaN dissimilarities anymore. Use pColl() in messages * R/zzz.R (sQuote): & dQuote() for R version < 1.8 2003-10-25 Martin Maechler * man/fanny.Rd: typo in the text-part of \deqn{} 2003-09-24 Martin Maechler * DESCRIPTION (Date): Sept.24: 20th anniversary w/ Lilo! -- release * man/plot.mona.Rd: yet another codoc 2003-09-23 Martin Maechler * DESCRIPTION (Version): 1.7.6 (for R 1.8.0) * man/daisy.Rd: codoc difference for argument *defaults* * man/ellipsoidhull.Rd: " " " " * man/plot.mona.Rd: " " " " * man/plot.diana.Rd: " " " " use \method{..}{..} (more!) also for these: * man/predict.ellipsoid.Rd * man/silhouette.Rd: * man/plot.agnes.Rd: * man/plot.partition.Rd: * man/ellipsoidhull.Rd: 2003-09-03 Martin Maechler * DESCRIPTION (Date): -> release 2003-08-27 Martin Maechler * man/flower.Rd: use V1-V8 in doc (since can't change the data). 2003-08-19 Martin Maechler * DESCRIPTION (Version): 1.7.5 -- not yet released 2003-08-13 Martin Maechler __ All these are thanks to Luke Tierney's checkUsagePackage() !! __ * R/silhouette.R (plot.silhouette): `clw' (sortSilhouette): `clid' & `k' (summary.silhouette): `n' * R/ellipsoidhull.R (ellipsoidhull): `ina' unneeded * R/plotpart.q (clusplot.default): extraneous if() in "funny case" 2003-07-18 Martin Maechler * DESCRIPTION (Date): updated --> release to CRAN * R/daisy.q (daisy): checking "> 2 levels" for binary vars (gave wrong error when only 1 level; thanks to Duncan.Mackay@flinders.edu.au). Now allow "only 1 level" for binary vars 2003-07-10 Martin Maechler * DESCRIPTION (Date): update; not released, but put up on FTP * R/silhouette.R (sortSilhouette): keep ordering; use it to * R/silhouette.R (plot.silhouette): order colors, allow cluster colors 2003-07-09 Martin Maechler * DESCRIPTION (Version): 1.7.4 -- not yet released * R/daisy.q (daisy): better error message for invalid type components; now also works for * man/daisy.Rd: new example `dfl3' 2003-06-10 Martin Maechler * DESCRIPTION (Version): 1.7.3 * R/silhouette.R (silhouette.default): fix bugs for case "Nj == 1" * tests/silhouette-default.Rout.save: * tests/silhouette-default.R: new test for the above 2003-06-04 Martin Maechler * tests/clara-ex.R and man/clara.Rd: add "drop = FALSE" 2003-06-03 Martin Maechler * man/clara.Rd: the "simulation" example is now correct for any seed. * tests/clara-ex.R: using "correct" ex. above 2003-06-02 Martin Maechler * R/daisy.q (daisy): s/codes/unclass/ * tests/ellipsoid-ex.R: no need for try() anymore; use new RNG * tests/clara.R: * tests/clara-ex.R: better if(...) RNGdefault("1.6") * tests/fanny-ex.R: * tests/mona.R: 2003-05-28 Martin Maechler * DESCRIPTION (Version): 1.7.2 * R/plotpart.q: clusplot.partition(): try to find `x$diss' by looking up x$call. 2003-04-30 Martin Maechler * DESCRIPTION (Version): 1.7.1 * man/pam.object.Rd: add example about assessing #{clusters} via silhouette widths from Ch.Hennig. * R/plotpart.q (plot.partition): new argument `dist' It doesn't try a clusplot if `keep.diss' was FALSE and no `dist' is specified. * R/plotpart.q (clusplot.partition): new `dist' argument * R/pam.q (pam): keep.diss passed wrong `jdyss' to Fortran! 2003-04-08 Martin Maechler * R/plothier.q (pltree.twins): simplified label construction; call plot( ) instead of plot.hclust(). 2003-03-26 Martin Maechler * tests/clara-ex.R: new, because of unclear non-reproducibility * tests/clara.R: not example(clara) and smaller, moved some to above * DESCRIPTION (Version): 1.7.0 (for R 1.7.0) * DESCRIPTION (Depends): at least R 1.4 * R/zzz.R: requiring 1.4 needs much less * R/zzz.R: define colSums() substitute for old R versions * tests/ : updated *.Rout.save files * .Rbuildignore: R/README-Splus is way outdated and irrelevant 2003-03-17 Martin Maechler * R/0aaa.R: Make sure assigning class c("dissimilarity", "dist") (had many cases where only "dissim.." was used!). * R/*.q: assign (dissim.., dist) class 2003-03-11 Martin Maechler * R/clara.q (clara): new argument `rngR' allowing to use R's RNG instead of the primitive builtin randm(). * man/clara.Rd: example showing its use. * src/clara.c (clara): * R/pam.q (pam): new `keep.data' and DUP=FALSE in .Fortran 2003-03-05 Martin Maechler * R/agnes.q (agnes): use DUP = FALSE * R/diana.q (diana): for both "twins()" routines --> 3x less memory! 2003-02-22 Martin Maechler * R/clara.q (clara): do *not* transpose the x[,] matrix anymore * src/clara.c (clara): C code accesses un-transposed x[,] \ --> + 10% speed 2003-02-07 Martin Maechler * R/silhouette.R (silhouette.default): fixed k=2 case; * man/silhouette.Rd: new argument `dmatrix'. 2003-02-06 Martin Maechler * man/plot.partition.Rd: new `main' argument (instead of "...") * R/plotpart.q: passed to plot.silhouette(). 2003-01-27 Martin Maechler * R/agnes.q (agnes): store `method' with object for as.hclust.twins()! \ new logical args `keep.diss' and `keep.data' * R/diana.q (diana): > if (!keep.diss), do not need "dis2" * src/twins.f (twins):/ and don't copy the dist()s here. * man/pltree.twins.Rd: mention new `ylab' argument. * R/plothier.q: __unfinished__ : xlab/ylab depend on "which.plot"! 2002-12-30 Martin Maechler * man/bannerplot.Rd: and * R/plothier.q (bannerplot): new `labels' e.g. passed from plot.agnes(). * man/plot.agnes.Rd: finally added `labels = ' example * tests/clara.R: for() loops were silly; + other ex + comments * src/clara.c: cosmetic * src/fanny.f: and * src/pam.f: cosmetic "syncing" of almost identical parts 2002-12-28 Martin Maechler * tests/clara.Rout.save: * tests/clara.R: add ruspini examples * src/clara.c: finally found & fixed "clara(ruspini, 4)" problem 2002-12-21 Martin Maechler * DESCRIPTION (Version): 1.6-4 {for R 1.6.2} * R/diana.q (diana): since 1.6-1, integer x was not coerced * tests/agnes-ex.R, * tests/diana-ex.R, and *.Rout.save: new test files * src/twins.f: some comments added; gotos removed banag() and bandy() were identical --> renamed to bncoef() 2002-12-17 Martin Maechler * R/agnes.q: agnes(*, method = "no") no longer segfaults * R/silhouette.R (plot.silhouette): adj=1.04 (not "1.05") * R/plothier.q (pltree.twins): new `labels' arg. such that it can be given to plot.agnes() e.g. 2002-12-05 Martin Maechler * DESCRIPTION (Date, Version): 1.6-3 * src/cluster.h: F77_NAME(meet) * src/clara.c: F77_CALL(meet) 2002-10-28 Martin Maechler * src/pam.f (pam): comments, (bswap): variable renaming 2002-10-26 Martin Maechler * DESCRIPTION (Version): 1.6-003 := pre-1.6-3 2002-10-23 Martin Maechler * R/agnes.q (print.summary.agnes): oops: had `clara' there! 2002-10-23 Martin Maechler * Version 1.6-2 ==> released to CRAN 2002-10-21 Martin Maechler * R/daisy.q (daisy): allow "+- Inf" in `x' (gave error from Fortran) 2002-10-19 Martin Maechler * R/silhouette.R (plot.silhouette): also use border = 0 and new default col = "gray". * R/plothier.q (bannerplot): arguments inside = . , border = 0 work with R 1.6.1's barplot() {give "not yet .." otherwise}. * tests/mona.R and mona.Rout.save: new tests * R/mona.q (mona): get rid of 1:nchar() warnings; other cosmetic * src/mona.f (mona): variable renaming; logical cleanup in NA dealing * R/zzz.R (stop): multi-argument version for R versions < 1.6 2002-10-18 Martin Maechler * R CMD check now okay. * man/plot.mona.Rd: update (for bannerplot() usage), same for * man/plot.agnes.Rd and * man/plot.diana.Rd. * R/plothier.q (bannerplot): finally working for all three; argument `xax.pretty = TRUE' (set to FALSE for old behavior). (plot.mona): using bannerplot(); {and is.na(.) <- .} 2002-10-16 Martin Maechler * R/plothier.q (bannerplot): newly standalone function, called from plot.diana(), plot.agnes() -- not yet plot.mona() 2002-10-15 Martin Maechler * DESCRIPTION (Version): 1.6-2 (not yet released) * R/plothier.q (plot.diana, bannerplot): rev(.) x-axis labels --> fixing bug introduced at/before version 1.2-5 * R/plothier.q (plot.agnes, bannerplot) and plot.diana: add "las = 2" to vertical axis(), and add space to right for plot.diana. 2002-09-12 Martin Maechler * DESCRIPTION (Date): -- released locally only 2002-09-11 Martin Maechler * R/clara.q (clara): for the case of k = 1 (and diss=FALSE), * R/pam.q (pam): medoids should stay matrix * src/pam.f (pam): put under RCS; replaced goto by if..else.. * man/pam.Rd: mention silhouette() directly * man/silhouette.Rd: document summary(); + 1ex 2002-09-09 Martin Maechler * DESCRIPTION (Version): 1.6-1 ==> for CRAN and R 1.6.0 2002-09-07 Martin Maechler * R/silhouette.R (silhouette): New class, generic generator, methods; particularly, plot.silhouette() was plot.partition internal. improve documentation about silhouette info; update * R/clara.q: * R/plotpart.q (plot.partition): 2002-09-06 Martin Maechler * man/partition.object.Rd, * man/pam.object.Rd, man/clara.object.Rd, man/fanny.object.Rd, * man/plot.partition.Rd: reorganize documentation on silhouette information, thanks to thoughts from Christian Hennig. 2002-09-04 Martin Maechler * man/agnes.object.Rd: mention as.hclust() properly * src/spannel.c (spannel): bail out also when *last* deter became <= 0 * tests/ellipsoid-ex.R (and *.Rout.save): updated * DESCRIPTION (Version): 1.6-0 * R/zzz.R: dev.interactive(), warnings(),... for old R versions * man/summary.clara.Rd: make example run for R versions < 1.5.0 2002-09-03 Martin Maechler * R/plotpart.q (plot.partition): new `data' argument and smart `which.plots' default for clara(*, keep = FALSE) * R/daisy.q (summary.dissimilarity): fix ``number of dissim'' * R/*.q: 1) use apply(inax,2, any)--- really want colNAs() 2) use better "valmisdat" (missing value). * tests/dysta-ex.R (dysta): new file, explore "dysta" & "dysta3" in Fortran * src/fanny.f: get rid of many gotos 2002-09-02 Martin Maechler * tests/fanny-ex.R: and ...Rout.save : new files * src/twins.f (twins): drop dysta4(), use dysta() from pam.f 2002-08-31 Martin Maechler * DESCRIPTION (Version): 1.5-4 -- have achieved something {want more} * src/clara.c (clara): finally found and fixed the rare segfault bug: jran == 1 was wrong test (when 1st sample was unusable) (randm): faster implementation of primitive RNG * src/spannel.c: new translated from src/spannel.f in order to debug the compiler-dependent outputs in * tests/ellipsoid-ex.Rout.save etc * tests/sweep-ex.R (.C) 2002-08-30 Martin Maechler * src/clara.c (clara): * man/clara.object.Rd: * R/clara.q: new argument `keepdata' and `trace' * R/plotpart.q (clusplot.default): xlim, ylim: believe user 2002-08-27 Martin Maechler * src/clara.c (clara): new file, translated from Fortran; easier to debug the (rare!) seg.fault. "make check fine" * DESCRIPTION (Depends): R >= 1.2 * R/plothier.q (pltree.twins): (drop code for R version < 1.2.0) 2002-08-23 Martin Maechler * DESCRIPTION (Version): 1.5-3 {released only locally} * R/plotpart.q (clusplot.default): xlim, ylim had "<" and ">" reversed * man/clusplot.default.Rd: fix interactive(), add x- and ylim example 2002-08-02 Martin Maechler * src/twins.f (twins) et al: * src/fanny.f (fanny) et al: * src/mona.f (mona): * src/daisy.f (daisy): explicit instead of "implicit" var.declaration * src/spannel.f (spannel) et al: no "implicit none" 2002-07-29 Martin Maechler * DESCRIPTION (Version): 1.5-2 * R/daisy.q (print.summary.dissimilarity): new function and summary.dissimilarity() now returns a classed object. * R/agnes.q (print.summary.agnes), * R/clara.q (print.summary.clara), * R/diana.q (print.summary.diana), * R/fanny.q (print.summary.fanny), * R/pam.q (print.summary.pam): print.summary.*() now only *summarizes* dissimilarities (if at all) 2002-07-27 Martin Maechler * tests/pam.R and tests/pam.Rout.save : new files * man/summary.agnes.Rd: + example * man/summary.clara.Rd: + example * R/clara.q (print.clara): improvements: call, no long clus. vector * R/agnes.q (print.agnes): similar * man/daisy.Rd : added "[mva]" to \link{}s. The same in: * man/clusplot.default.Rd: new `col.clus' argument, new option `labels = 5'. * R/plotpart.q (clusplot.default): cosmetical cleaning; `col.p' is now vectorized for point coloring. The cluster ellipse is now labeled with font=4 and proper color 2002-06-17 Martin Maechler * man/sizeDiss.Rd: fix \value{} * tests/daisy-ex.R: new file (and *.Rout.save) 2002-06-17 Martin Maechler * submit to CRAN -- won't be in R's 1.5.1-recommended * R/daisy.q (daisy): make sure "0","1" factors are valid binary vars several extra checks {incl apply() `bug' * man/diana.object.Rd: show how to use cutree(). 2002-05-22 Martin Maechler * R/daisy.q (daisy): warn if binary variables have non-{0,1} values. * src/pam.f (cstat) et al: eliminated many gotos; +comments * src/meet.f (meet): + comment 2002-05-21 Martin Maechler * DESCRIPTION (Version): 1.5-1 new daisy() behavior for binary variables * src/daisy.f (daisy): add comments; use if..else.. instead of goto * man/dissimilarity.object.Rd: new "Types" attribute in mixed case. * man/daisy.Rd: * R/daisy.q (daisy): fix data.class "integer", allow type = "symm"; return types used in mixed case; correctly modify jtmd[] for binary variables (!) 2002-03-30 Martin Maechler * R/plotpart.q: replace `` == "NA" '' by is.na(.) * R/mona.q (mona): 2002-03-04 Martin Maechler * DESCRIPTION (Version): 1.4-1 * R/zzz.R (.First.lib), * R/plothier.q: replace plclust() by plot[.hclust]() everywhere. 2002-01-29 Martin Maechler * R/pam.q (pam): comment on "valmisdat"; same in * R/fanny.q, R/agnes.q, R/clara.q, R/diana.q * src/pam.f (dysta): comment + white space * src/fanny.f (fanny): lowercase and indent + comments 2002-01-24 Martin Maechler * man/agnes.Rd, diana.Rd, pam.Rd, clara.Rd, mona.Rd, fanny.Rd : Reference and BACKGROUND section only in agnes.Rd; the others refer to agnes. * man/fanny.Rd: clean * R/agnes.q (agnes): \ ``diss = inherits(x, "dist")'' * R/diana.q (diana): > instead of "diss = FALSE" * R/fanny.q (fanny): / as we have changed pam() already in 1.4-0 2002-01-23 Martin Maechler * DESCRIPTION (Version): 1.4-0 * man/ellipsoidhull.Rd: example * tests/ellipsoid-ex.R and *.Rout: finalized * man/pluton.Rd: work around Rdconv \eqn{.}{.} bug. 2002-01-22 Martin Maechler * R/ellipsehull.R (ellipsehull) et al: generalized from 2 to p dimensions. -- points generation: not yet! * tests/ellipsehull.R: new test file * man/clusplot.partition.Rd: clean up * man/clusplot.default.Rd: proper reference to Pison et al * man/clusplot.Rd: clean 2002-01-21 Martin Maechler * R/ellipsehull.R (ellipsehull) and others: new functions * R/plotpart.q (clusplot.default) use new ellipsePoints(); simplification by using "d2" (= dist^2) instead of "dist". 2002-01-19 Martin Maechler * R/plotpart.q (clusplot.default) re-scale cov.wt() result: Finally return the smallest possible ellipses. NOTA BENE ===> (numerical) results are *different* ! 2002-01-18 Martin Maechler * R/plotpart.q (clusplot.default) {spannel}: Finally found why our ellipses are not quite ok : R's cov.wt() differs from S-plus' ! * src/spannel.f (spannel): new argument `maxit' (was 5000). * R/plotpart.q (cusplot.default): cleanup, eliminating internal kleur() & plotje(); and "spannel" arguments; new maxit; lower eps use which.min() and which.max(); ... * R/pam.q (pam): diss has new default = inherits(x, "dist") which is TRUE therefore for dissimilarity or dist objects. 2002-01-12 Martin Maechler | 2002-01-18 Martin Maechler * R/agnes.q, R/diana.q : a bit of cleanup in the two twins calling functions. * man/lower.to.upper.tri.inds.Rd, * man/cluster-internal.Rd: new for formerly local functions, now in * R/internal.R (sizeDiss), * R/internal.R (lower.to.upper.tri.inds), and upper.to...: new functions instead of local functions in several places, e.g., * R/diana.q, R/fanny.q, ... * R/plotpart.q (clusplot.default): fix bug PR#1249: cmd() != cmdscale(); use new cmdscale(*, add=TRUE) ---> (slightly) different result sometimes fix long-standing typo in NA case + more cleanup * R/plotpart.q (clusplot.partition): explicit `main' argument with better default. 2001-12-06 Martin Maechler * DESCRIPTION (Version): 1.3-6 * R/plotpart.q: enable `density =' for polygon shading. 2001-11-27 Martin Maechler * R/zzz.R: get rid of .Alias 2001-11-06 Martin Maechler * DESCRIPTION (Version): 1.3-5 * R/plothier.q: Fix menu() bug thanks to Richard Rowe. * R/plotpart.q: ditto * R/agnes.q: properly allow integer matrix input: don't use all(sapply(x, data.class) == "numeric") anymore. * R/clara.q, R/diana.q, R/fanny.q, R/pam.q: ditto 2001-11-05 Martin Maechler * R/pam.q: `call' via match.call() instead of sys.call, and as list component instead of attribute. [R philosophy compatibility] * R/mona.q: ditto * R/fanny.q, R/diana.q, R/clara.q, R/agnes.q, * R/plothier.q, R/plotpart.q: ditto 2001-10-09 Martin Maechler * DESCRIPTION (Version): 1.3-5b (beta) for sending to Robert G * R/plothier.q: plot.diana() must have main=NULL * R/diana.q: minor code cleanup 2001-08-27 Martin Maechler * README.orig: renamed from R/README-splus 2001-08-22 Martin Maechler * DESCRIPTION (Version): New version is 1.3-4 * man/flower.Rd: nicer using \describe{} * man/plot.partition.Rd (and R/plotpart.q): new argument `which.plots' (as in the other plot.* functions). * R/plothier.q: All plot.* methods which produce more than one plot now call par(ask = TRUE) automagically when `needed' (as e.g., in plot.lm()). * man/*.Rd: document all arguments; a bit more cleanup. R (1.4.0) CMD check is now okay. 2001-08-18 Martin Maechler * R/*.q and man/*.Rd: generic/method argument fixes 2001-05-26 Martin Maechler * man/*.Rd: indenting in all dataset examples * man/votes.repub.Rd: usage fix 2001-05-23 Martin Maechler * INDEX: structured logically, rather than alphabetically * tests/clara.R: new test * src/clara.f (clara): added comments * R/clara.q (clara) : comments and cleanup 2001-05-22 Martin Maechler * DESCRIPTION (Version): New version is 1.3-3. * R/agnes.q and all others: `components' not `arguments' in print.*() * src/meet.f (meet): use [if then else] instead of goto * src/clara.f (clara): all declarations explicit; some cleanup 2001-05-21 Martin Maechler * DESCRIPTION (Package): licence changed to GPL (Rousseeuw's e-mail) * R/pam.q: minor code cleanup for Fortran interface * src/pam.f (pam): all declarations explicit * README: integrated former ./README_MM * src/twins.f, many R/*.q and * man/pltree.Rd: replace s/S-plus/S/ in many places 2001-03-21 Martin Maechler * man/print.summary.FOO.Rd: drop these files, move some to FOO.Rd * man/print*.Rd: cleanup, use \method{} 2001-01-04 Martin Maechler * DESCRIPTION (Version): New version is 1.3-2. * man/print*.Rd: Better \title{.}, hence * INDEX * man/*.Rd: Remove \keyword{libcluster}; we have {cluster}. 2001-01-03 Martin Maechler * DESCRIPTION (Version): New version is 1.3-1. 2001-01-02 Martin Maechler * man/*.Rd: fixes for codoc() * src/spannel.f (spannel): improve readability, indent properly, add a few comments * src/clara.f: * src/pam.f: * src/twins.f: * R/*.q : Added PACKAGE = .. to all .Fortran() calls ===== Many codoc() fixes; particularly summary.*(*, ...) * R/plotpart.q: (clusplot.partition): simplified * R/agnes.q: T/F -> TRUE/FALSE and more * R/clara.q: * R/diana.q: * R/fanny.q: * R/mona.q: * R/pam.q: 2000-12-30 Martin Maechler * DESCRIPTION (Version): New version is 1.2-5. 2000-12-24 Kurt Hornik * DESCRIPTION (Version): New version is 1.2-4. (Maintainer): New entry. 2000-12-14 Martin Maechler * src/daisy.f: indented do loops; one if / else. * R/daisy.q: __ daisy() __ - "ordratio" |-> "T", was "O" erronously! - `metric' and `list' argument checking * man/clusplot.default.Rd: updated and cleaned. 2000-12-02 Martin Maechler * R/plothier.q: plot.agnes() & plot.diana() : main=NULL defaults to two different titles for both plots 2000-11-30 Martin Maechler * man/...Rd: - \section{NOTE} becomes \note - fix most T/F to TRUE/FALSE, .. * R/plothier.q: - cleanup (T/F); indenting - plot.mona(): las = 1 for axis; allow main= - plot.diana(): `which.plot' and main= and sub= - plot.agnes(): `which.plot' and main= and sub= - pltree.twins(): allow main= ; rm"plot = TRUE" (warn) --> now depends on R 1.2's plot.hclust() * R/plotpart.q: clusplot.default() -- now works! - *much* clean up - color choice such as to see points - got rid of NaN warning - eliminated "polygon(*,density.) warnings by '##no yet'" 2000-11-29 Martin Maechler * R/daisy.q: add "dist" class (and fix T/F to TRUE/FALSE etc) * R/daisy.q and * man/print.dissimilarity.Rd: add summary.dissimilarity() * man/dissimilarity.object.Rd: cleanup, use \value{.}, doc. "dist" * man/daisy.Rd: cleanup, use \value{.} * man/agnes.Rd: cleanup. * man/*.object.Rd: cleanup, use \value{.} Thu Feb 17 22:56:58 2000 Kurt Hornik * DESCRIPTION (Version): New version is 1.2-3. * src/Makefile: Removed. Tue Dec 28 18:41:09 1999 Kurt Hornik * DESCRIPTION (Version): New version is 1.2-2. * data/00Index: Added entry for `xclara'. * man/xclara.Rd: New file. * data/figure2.R: * data/table4.R: Removed as unused and undocumented. Sun Dec 5 20:14:45 1999 Kurt Hornik * DESCRIPTION (Version): New version is 1.2-1. * R/daisy.q: * src/daisy.f: * PORTING: Rewrite to pass integers rather than character strings to Fortran (changes provided by BDR). Sun Apr 11 04:21:03 1999 Kurt Hornik * DESCRIPTION (Version): New version is 1.2-0. * R/plotpart.q: Replace rep.int() by rep(). * R/zzz.R: Make .First.lib() use plot.hclust() for plclust() which seems to do the job, sort of. * data/animals.R: Replaced by `animals.tab'. * data/ruspini.R: Replaced by `ruspini.tab'. * data/votes.repub.tab: New file. * man/agriculture.Rd: New file. * man/animals.Rd: New file. * man/flower.Rd: New file. * man/ruspini.Rd: New file. * man/votes.repub.Rd: New file. * man/*: Hand-edit all examples to make executable. Fri Nov 27 23:53:11 1998 Kurt Hornik * DESCRIPTION (Version): New version is 1.1-3. * R/mona.q: Barf only if neither a matrix nor a data frame (remember that in S, is.matrix() is TRUE for data frames). * man/*: Converted anew via `Sd2Rd -x' using Sd2Rd 0.3-2. * TODO: Removed. Tue Jun 16 09:23:15 1998 Kurt Hornik * DESCRIPTION (Version): New version is 1.1-2. * DESCRIPTION: * PORTING: * TITLE: * R/zzz.R: * src/Makefile: Change old `clus' to new name `cluster'. Mon Jun 15 11:01:52 1998 Kurt Hornik * ChangeLog: Finally started, current version is 1.1-1. cluster/man/0000755000175100001440000000000012553131037012524 5ustar hornikuserscluster/man/print.clara.Rd0000644000175100001440000000127407520565344015246 0ustar hornikusers\name{print.clara} \alias{print.clara} \title{Print Method for CLARA Objects} \description{ Prints the best sample, medoids, clustering vector and objective function of \code{clara} object. This is a method for the function \code{\link{print}()} for objects inheriting from class \code{\link{clara}}. } \usage{ \method{print}{clara}(x, \dots) } \arguments{ \item{x}{a clara object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{ \code{\link{summary.clara}} producing more output; \code{\link{clara}}, \code{\link{clara.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} % Converted by Sd2Rd version 0.3-2. cluster/man/agnes.Rd0000644000175100001440000002762212422156450014122 0ustar hornikusers\name{agnes} \alias{agnes} \title{Agglomerative Nesting (Hierarchical Clustering)} \concept{UPGMA clustering} \description{ Computes agglomerative hierarchical clustering of the dataset. } \usage{ agnes(x, diss = inherits(x, "dist"), metric = "euclidean", stand = FALSE, method = "average", par.method, keep.diss = n < 100, keep.data = !diss, trace.lev = 0) } \arguments{ \item{x}{ data matrix or data frame, or dissimilarity matrix, depending on the value of the \code{diss} argument. In case of a matrix or data frame, each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (NAs) are allowed. In case of a dissimilarity matrix, \code{x} is typically the output of \code{\link{daisy}} or \code{\link{dist}}. Also a vector with length n*(n-1)/2 is allowed (where n is the number of observations), and will be interpreted in the same way as the output of the above-mentioned functions. Missing values (NAs) are not allowed. } \item{diss}{ logical flag: if TRUE (default for \code{dist} or \code{dissimilarity} objects), then \code{x} is assumed to be a dissimilarity matrix. If FALSE, then \code{x} is treated as a matrix of observations by variables. } \item{metric}{ character string specifying the metric to be used for calculating dissimilarities between observations. The currently available options are \code{"euclidean"} and \code{"manhattan"}. Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences. If \code{x} is already a dissimilarity matrix, then this argument will be ignored. } \item{stand}{ logical flag: if TRUE, then the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. If \code{x} is already a dissimilarity matrix, then this argument will be ignored. } \item{method}{ character string defining the clustering method. The six methods implemented are \code{"average"} ([unweighted pair-]group [arithMetic] average method, aka \sQuote{UPGMA}), \code{"single"} (single linkage), \code{"complete"} (complete linkage), \code{"ward"} (Ward's method), \code{"weighted"} (weighted average linkage, aka \sQuote{WPGMA}), its generalization \code{"flexible"} which uses (a constant version of) the Lance-Williams formula and the \code{par.method} argument, and \code{"gaverage"} a generalized \code{"average"} aka \dQuote{flexible UPGMA} method also using the Lance-Williams formula and \code{par.method}. The default is \code{"average"}. } \item{par.method}{ If \code{method} is \code{"flexible"} or \code{"gaverage"}, a numeric vector of length 1, 3, or 4, (with a default for \code{"gaverage"}), see in the details section. } \item{keep.diss, keep.data}{logicals indicating if the dissimilarities and/or input data \code{x} should be kept in the result. Setting these to \code{FALSE} can give much smaller results and hence even save memory allocation \emph{time}.} \item{trace.lev}{integer specifying a trace level for printing diagnostics during the algorithm. Default \code{0} does not print anything; higher values print increasingly more.} } \value{ an object of class \code{"agnes"} (which extends \code{"twins"}) representing the clustering. See \code{\link{agnes.object}} for details, and methods applicable. } \author{ Method \code{"gaverage"} has been contributed by Pierre Roudier, Landcare Research, New Zealand. } \details{ \code{agnes} is fully described in chapter 5 of Kaufman and Rousseeuw (1990). Compared to other agglomerative clustering methods such as \code{hclust}, \code{agnes} has the following features: (a) it yields the agglomerative coefficient (see \code{\link{agnes.object}}) which measures the amount of clustering structure found; and (b) apart from the usual tree it also provides the banner, a novel graphical display (see \code{\link{plot.agnes}}). The \code{agnes}-algorithm constructs a hierarchy of clusterings.\cr At first, each observation is a small cluster by itself. Clusters are merged until only one large cluster remains which contains all the observations. At each stage the two \emph{nearest} clusters are combined to form one larger cluster. For \code{method="average"}, the distance between two clusters is the average of the dissimilarities between the points in one cluster and the points in the other cluster. \cr In \code{method="single"}, we use the smallest dissimilarity between a point in the first cluster and a point in the second cluster (nearest neighbor method). \cr When \code{method="complete"}, we use the largest dissimilarity between a point in the first cluster and a point in the second cluster (furthest neighbor method). The \code{method = "flexible"} allows (and requires) more details: The Lance-Williams formula specifies how dissimilarities are computed when clusters are agglomerated (equation (32) in K&R(1990), p.237). If clusters \eqn{C_1} and \eqn{C_2} are agglomerated into a new cluster, the dissimilarity between their union and another cluster \eqn{Q} is given by \deqn{ D(C_1 \cup C_2, Q) = \alpha_1 * D(C_1, Q) + \alpha_2 * D(C_2, Q) + \beta * D(C_1,C_2) + \gamma * |D(C_1, Q) - D(C_2, Q)|, } where the four coefficients \eqn{(\alpha_1, \alpha_2, \beta, \gamma)} are specified by the vector \code{par.method}, either directly as vector of length 4, or (more conveniently) if \code{par.method} is of length 1, say \eqn{= \alpha}, \code{par.method} is extended to give the \dQuote{Flexible Strategy} (K&R(1990), p.236 f) with Lance-Williams coefficients \eqn{(\alpha_1 = \alpha_2 = \alpha, \beta = 1 - 2\alpha, \gamma=0)}.\cr Also, if \code{length(par.method) == 3}, \eqn{\gamma = 0} is set. \bold{Care} and expertise is probably needed when using \code{method = "flexible"} particularly for the case when \code{par.method} is specified of longer length than one. Since \pkg{cluster} version 2.0, choices leading to invalid \code{merge} structures now signal an error (from the C code already). The \emph{weighted average} (\code{method="weighted"}) is the same as \code{method="flexible", par.method = 0.5}. Further, \code{method= "single"} is equivalent to \code{method="flexible", par.method = c(.5,.5,0,-.5)}, and \code{method="complete"} is equivalent to \code{method="flexible", par.method = c(.5,.5,0,+.5)}. The \code{method = "gaverage"} is a generalization of \code{"average"}, aka \dQuote{flexible UPGMA} method, and is (a generalization of the approach) detailed in Belbin et al. (1992). As \code{"flexible"}, it uses the Lance-Williams formula above for dissimilarity updating, but with \eqn{\alpha_1} and \eqn{\alpha_2} not constant, but \emph{proportional} to the \emph{sizes} \eqn{n_1} and \eqn{n_2} of the clusters \eqn{C_1} and \eqn{C_2} respectively, i.e, \deqn{\alpha_j = \alpha'_j \frac{n_1}{n_1+n_2},}{% \alpha_j = \alpha'_j * n_1/(n_1 + n_2),} where \eqn{\alpha'_1}, \eqn{\alpha'_2} are determined from \code{par.method}, either directly as \eqn{(\alpha_1, \alpha_2, \beta, \gamma)} or \eqn{(\alpha_1, \alpha_2, \beta)} with \eqn{\gamma = 0}, or (less flexibly, but more conveniently) as follows: Belbin et al proposed \dQuote{flexible beta}, i.e. the user would only specify \eqn{\beta} (as \code{par.method}), sensibly in \deqn{-1 \leq \beta < 1,}{-1 \le \beta < 1,} and \eqn{\beta} determines \eqn{\alpha'_1} and \eqn{\alpha'_2} as \deqn{\alpha'_j = 1 - \beta,} and \eqn{\gamma = 0}. This \eqn{\beta} may be specified by \code{par.method} (as length 1 vector), and if \code{par.method} is not specified, a default value of -0.1 is used, as Belbin et al recommend taking a \eqn{\beta} value around -0.1 as a general agglomerative hierarchical clustering strategy. Note that \code{method = "gaverage", par.method = 0} (or \code{par.method = c(1,1,0,0)}) is equivalent to the \code{agnes()} default method \code{"average"}. } \section{BACKGROUND}{ Cluster analysis divides a dataset into groups (clusters) of observations that are similar to each other. \describe{ \item{Hierarchical methods}{like \code{agnes}, \code{\link{diana}}, and \code{\link{mona}} construct a hierarchy of clusterings, with the number of clusters ranging from one to the number of observations.} \item{Partitioning methods}{like \code{\link{pam}}, \code{\link{clara}}, and \code{\link{fanny}} require that the number of clusters be given by the user.} } } \references{ Kaufman, L. and Rousseeuw, P.J. (1990). (=: \dQuote{K&R(1990)}) \emph{Finding Groups in Data: An Introduction to Cluster Analysis}. Wiley, New York. Anja Struyf, Mia Hubert and Peter J. Rousseeuw (1996) Clustering in an Object-Oriented Environment. \emph{Journal of Statistical Software} \bold{1}. \url{http://www.jstatsoft.org/v01/i04} Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997). Integrating Robust Clustering Techniques in S-PLUS, \emph{Computational Statistics and Data Analysis}, \bold{26}, 17--37. Lance, G.N., and W.T. Williams (1966). A General Theory of Classifactory Sorting Strategies, I. Hierarchical Systems. \emph{Computer J.} \bold{9}, 373--380. Belbin, L., Faith, D.P. and Milligan, G.W. (1992). A Comparison of Two Approaches to Beta-Flexible Clustering. \emph{Multivariate Behavioral Research}, \bold{27}, 417--433. } \seealso{ \code{\link{agnes.object}}, \code{\link{daisy}}, \code{\link{diana}}, \code{\link{dist}}, \code{\link{hclust}}, \code{\link{plot.agnes}}, \code{\link{twins.object}}. } \examples{ data(votes.repub) agn1 <- agnes(votes.repub, metric = "manhattan", stand = TRUE) agn1 plot(agn1) op <- par(mfrow=c(2,2)) agn2 <- agnes(daisy(votes.repub), diss = TRUE, method = "complete") plot(agn2) ## alpha = 0.625 ==> beta = -1/4 is "recommended" by some agnS <- agnes(votes.repub, method = "flexible", par.meth = 0.625) plot(agnS) par(op) ## "show" equivalence of three "flexible" special cases d.vr <- daisy(votes.repub) a.wgt <- agnes(d.vr, method = "weighted") a.sing <- agnes(d.vr, method = "single") a.comp <- agnes(d.vr, method = "complete") iC <- -(6:7) # not using 'call' and 'method' for comparisons stopifnot( all.equal(a.wgt [iC], agnes(d.vr, method="flexible", par.method = 0.5)[iC]) , all.equal(a.sing[iC], agnes(d.vr, method="flex", par.method= c(.5,.5,0, -.5))[iC]), all.equal(a.comp[iC], agnes(d.vr, method="flex", par.method= c(.5,.5,0, +.5))[iC])) ## Exploring the dendrogram structure (d2 <- as.dendrogram(agn2)) # two main branches d2[[1]] # the first branch d2[[2]] # the 2nd one { 8 + 42 = 50 } d2[[1]][[1]]# first sub-branch of branch 1 .. and shorter form identical(d2[[c(1,1)]], d2[[1]][[1]]) ## a "textual picture" of the dendrogram : str(d2) data(agriculture) ## Plot similar to Figure 7 in ref \dontrun{plot(agnes(agriculture), ask = TRUE)} \dontshow{plot(agnes(agriculture))} data(animals) aa.a <- agnes(animals) # default method = "average" aa.ga <- agnes(animals, method = "gaverage") op <- par(mfcol=1:2, mgp=c(1.5, 0.6, 0), mar=c(.1+ c(4,3,2,1)), cex.main=0.8) plot(aa.a, which.plot = 2) plot(aa.ga, which.plot = 2) par(op) \dontshow{## equivalence stopifnot( ## below show ave == gave(0); here ave == gave(c(1,1,0,0)): all.equal(aa.a [iC], agnes(animals, method="gave", par.meth= c(1,1,0,0))[iC]), all.equal(aa.ga[iC], agnes(animals, method="gave", par.meth= -0.1)[iC]), all.equal(aa.ga[iC], agnes(animals, method="gav", par.m= c(1.1,1.1,-0.1,0))[iC])) } ## Show how "gaverage" is a "generalized average": aa.ga.0 <- agnes(animals, method = "gaverage", par.method = 0) stopifnot(all.equal(aa.ga.0[iC], aa.a[iC])) } \keyword{cluster} cluster/man/plot.partition.Rd0000644000175100001440000001110511674154360016006 0ustar hornikusers\name{plot.partition} \alias{plot.partition} \title{Plot of a Partition of the Data Set} \description{Creates plots for visualizing a \code{partition} object.} \usage{ \method{plot}{partition}(x, ask = FALSE, which.plots = NULL, nmax.lab = 40, max.strlen = 5, data = x$data, dist = NULL, stand = FALSE, lines = 2, shade = FALSE, color = FALSE, labels = 0, plotchar = TRUE, span = TRUE, xlim = NULL, ylim = NULL, main = NULL, \dots) } \arguments{ \item{x}{an object of class \code{"partition"}, typically created by the functions \code{\link{pam}}, \code{\link{clara}}, or \code{\link{fanny}}.} \item{ask}{logical; if true and \code{which.plots} is \code{NULL}, \code{plot.partition} operates in interactive mode, via \code{\link{menu}}.} \item{which.plots}{integer vector or NULL (default), the latter producing both plots. Otherwise, \code{which.plots} must contain integers of \code{1} for a \emph{clusplot} or \code{2} for \emph{silhouette}.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for single-name labeling the silhouette plot.} \item{max.strlen}{positive integer giving the length to which strings are truncated in silhouette plot labeling.} \item{data}{numeric matrix with the scaled data; per default taken from the partition object \code{x}, but can be specified explicitly.} \item{dist}{when \code{x} does not have a \code{diss} component as for \code{\link{pam}(*, keep.diss=FALSE)}, \code{dist} must be the dissimilarity if a clusplot is desired.} \item{stand,lines,shade,color,labels,plotchar,span,xlim,ylim,main, \dots}{ All optional arguments available for the \code{\link{clusplot.default}} function (except for the \code{diss} one) and graphical parameters (see \code{\link{par}}) may also be supplied as arguments to this function.} } \section{Side Effects}{ An appropriate plot is produced on the current graphics device. This can be one or both of the following choices: \cr Clusplot \cr Silhouette plot } \details{ When \code{ask= TRUE}, rather than producing each plot sequentially, \code{plot.partition} displays a menu listing all the plots that can be produced. If the menu is not desired but a pause between plots is still wanted, call \code{par(ask= TRUE)} before invoking the plot command. The \emph{clusplot} of a cluster partition consists of a two-dimensional representation of the observations, in which the clusters are indicated by ellipses (see \code{\link{clusplot.partition}} for more details). The \emph{silhouette plot} of a nonhierarchical clustering is fully described in Rousseeuw (1987) and in chapter 2 of Kaufman and Rousseeuw (1990). For each observation i, a bar is drawn, representing its silhouette width s(i), see \code{\link{silhouette}} for details. Observations are grouped per cluster, starting with cluster 1 at the top. Observations with a large s(i) (almost 1) are very well clustered, a small s(i) (around 0) means that the observation lies between two clusters, and observations with a negative s(i) are probably placed in the wrong cluster. A clustering can be performed for several values of \code{k} (the number of clusters). Finally, choose the value of \code{k} with the largest overall average silhouette width. } \note{ In the silhouette plot, observation labels are only printed when the number of observations is less than \code{nmax.lab} (40, by default), for readability. Moreover, observation labels are truncated to maximally \code{max.strlen} (5) characters. \cr For more flexibility, use \code{plot(silhouette(x), ...)}, see \code{\link{plot.silhouette}}. } \references{ Rousseeuw, P.J. (1987) Silhouettes: A graphical aid to the interpretation and validation of cluster analysis. \emph{J. Comput. Appl. Math.}, \bold{20}, 53--65. Further, the references in \code{\link{plot.agnes}}. } \seealso{ \code{\link{partition.object}}, \code{\link{clusplot.partition}}, \code{\link{clusplot.default}}, \code{\link{pam}}, \code{\link{pam.object}}, \code{\link{clara}}, \code{\link{clara.object}}, \code{\link{fanny}}, \code{\link{fanny.object}}, \code{\link{par}}. } \examples{ ## generate 25 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)), cbind(rnorm(15,5,0.5), rnorm(15,5,0.5))) plot(pam(x, 2)) ## Save space not keeping data in clus.object, and still clusplot() it: data(xclara) cx <- clara(xclara, 3, keep.data = FALSE) cx$data # is NULL plot(cx, data = xclara) } \keyword{cluster} \keyword{hplot} % Converted by Sd2Rd version 0.3-2. cluster/man/plantTraits.Rd0000644000175100001440000001075610631261142015325 0ustar hornikusers\name{plantTraits} \alias{plantTraits} \title{Plant Species Traits Data} \docType{data} \encoding{latin1} \description{ This dataset constitutes a description of 136 plant species according to biological attributes (morphological or reproductive) } \usage{data(plantTraits) } \format{ A data frame with 136 observations on the following 31 variables. \describe{ \item{\code{pdias}}{Diaspore mass (mg)} \item{\code{longindex}}{Seed bank longevity} \item{\code{durflow}}{Flowering duration} \item{\code{height}}{Plant height, an ordered factor with levels \code{1} < \code{2} < \dots < \code{8}.} % Plant height}{an ordered factor with levels \code{1} <10cm < \code{2} 10-30cm< \code{3} 30-60cm< \code{4}60-100cm < \code{5}1-3m < \code{6}3-6m < \code{7}:6-15m < \code{8}>15m} \item{\code{begflow}}{Time of first flowering, an ordered factor with levels \code{1} < \code{2} < \code{3} < \code{4} < \code{5} < \code{6} < \code{7} < \code{8} < \code{9}} % {\code{begflow}}{an ordered factor with levels \code{1} january< \code{2} february< \code{3} march< \code{4}april < \code{5} may< \code{6} june< \code{7} july< \code{8}august < \code{9}september} \item{\code{mycor}}{Mycorrhizas, an ordered factor with levels \code{0}never < \code{1} sometimes< \code{2}always} \item{\code{vegaer}}{aerial vegetative propagation, an ordered factor with levels \code{0}never < \code{1} present but limited< \code{2}important.} \item{\code{vegsout}}{underground vegetative propagation, an ordered factor with 3 levels identical to \code{vegaer} above.} \item{\code{autopoll}}{selfing pollination, an ordered factor with levels \code{0}never < \code{1}rare < \code{2} often< the rule\code{3}} \item{\code{insects}}{insect pollination, an ordered factor with 5 levels \code{0} < \dots < \code{4}.} \item{\code{wind}}{wind pollination, an ordered factor with 5 levels \code{0} < \dots < \code{4}.} \item{\code{lign}}{a binary factor with levels \code{0:1}, indicating if plant is woody.} \item{\code{piq}}{a binary factor indicating if plant is thorny.} \item{\code{ros}}{a binary factor indicating if plant is rosette.} \item{\code{semiros}}{semi-rosette plant, a binary factor (\code{0}: no; \code{1}: yes).} \item{\code{leafy}}{leafy plant, a binary factor.} \item{\code{suman}}{summer annual, a binary factor.} \item{\code{winan}}{winter annual, a binary factor.} \item{\code{monocarp}}{monocarpic perennial, a binary factor.} \item{\code{polycarp}}{polycarpic perennial, a binary factor.} \item{\code{seasaes}}{seasonal aestival leaves, a binary factor.} \item{\code{seashiv}}{seasonal hibernal leaves, a binary factor.} \item{\code{seasver}}{seasonal vernal leaves, a binary factor.} \item{\code{everalw}}{leaves always evergreen, a binary factor.} \item{\code{everparti}}{leaves partially evergreen, a binary factor.} \item{\code{elaio}}{fruits with an elaiosome (dispersed by ants), a binary factor.} \item{\code{endozoo}}{endozoochorous fruits, a binary factor.} \item{\code{epizoo}}{epizoochorous fruits, a binary factor.} \item{\code{aquat}}{aquatic dispersal fruits, a binary factor.} \item{\code{windgl}}{wind dispersed fruits, a binary factor.} \item{\code{unsp}}{unspecialized mechanism of seed dispersal, a binary factor.} } } \details{ Most of factor attributes are not disjunctive. For example, a plant can be usually pollinated by insects but sometimes self-pollination can occured. } \source{ Vallet, Jeanne (2005) \emph{Structuration de communauts vgtales et analyse comparative de traits biologiques le long d'un gradient d'urbanisation}. Mmoire de Master 2 'Ecologie-Biodiversit-Evolution'; Universit Paris Sud XI, 30p.+ annexes (in french) } % \references{ % ~~ possibly secondary sources and usages ~~ % } \examples{ data(plantTraits) ## Calculation of a dissimilarity matrix library(cluster) dai.b <- daisy(plantTraits, type = list(ordratio = 4:11, symm = 12:13, asymm = 14:31)) ## Hierarchical classification agn.trts <- agnes(dai.b, method="ward") plot(agn.trts, which.plots = 2, cex= 0.6) plot(agn.trts, which.plots = 1) cutree6 <- cutree(agn.trts, k=6) cutree6 ## Principal Coordinate Analysis cmdsdai.b <- cmdscale(dai.b, k=6) plot(cmdsdai.b[, 1:2], asp = 1, col = cutree6) } \keyword{datasets} % plant attribute database, mixed type variables, dissimilarity matrix (DAISY), Hierarchical Classification (AGNES) % Principal Coordinates Analysis (CMDSCALE) cluster/man/mona.object.Rd0000644000175100001440000000276107554320503015223 0ustar hornikusers\name{mona.object} \alias{mona.object} \title{Monothetic Analysis (MONA) Object} \description{ The objects of class \code{"mona"} represent the divisive hierarchical clustering of a dataset with only binary variables (measurements). This class of objects is returned from \code{\link{mona}}. } \section{METHODS}{ The \code{"mona"} class has methods for the following generic functions: \code{print}, \code{summary}, \code{plot}. } \value{ A legitimate \code{mona} object is a list with the following components: \item{data}{ matrix with the same dimensions as the original data matrix, but with factors coded as 0 and 1, and all missing values replaced. } \item{order}{ a vector giving a permutation of the original observations to allow for plotting, in the sense that the branches of a clustering tree will not cross. } \item{order.lab}{ a vector similar to \code{order}, but containing observation labels instead of observation numbers. This component is only available if the original observations were labelled. } \item{variable}{ vector of length n-1 where n is the number of observations, specifying the variables used to separate the observations of \code{order}. } \item{step}{ vector of length n-1 where n is the number of observations, specifying the separation steps at which the observations of \code{order} are separated. } } \seealso{\code{\link{mona}} for examples etc, \code{\link{plot.mona}}. } \keyword{cluster} cluster/man/chorSub.Rd0000644000175100001440000000165610417224251014426 0ustar hornikusers\name{chorSub} \alias{chorSub} \docType{data} \title{Subset of C-horizon of Kola Data} \description{ This is a small rounded subset of the C-horizon data \code{\link[mvoutlier]{chorizon}} from package \pkg{mvoutlier}. } \usage{data(chorSub)} \format{ A data frame with 61 observations on 10 variables. The variables contain scaled concentrations of chemical elements. } \details{ This data set was produced from \code{chorizon} via these statements: \preformatted{ data(chorizon, package = "mvoutlier") chorSub <- round(100*scale(chorizon[,101:110]))[190:250,] storage.mode(chorSub) <- "integer" colnames(chorSub) <- gsub("_.*", '', colnames(chorSub)) } } \source{Kola Project (1993-1998) } \seealso{ \code{\link[mvoutlier]{chorizon}} in package \pkg{mvoutlier} and other Kola data in the same package. } \examples{ data(chorSub) summary(chorSub) pairs(chorSub, gap= .1)# some outliers } \keyword{datasets} cluster/man/pluton.Rd0000644000175100001440000000331612456276724014356 0ustar hornikusers\name{pluton} \alias{pluton} \title{Isotopic Composition Plutonium Batches} \usage{data(pluton)} \description{ The \code{pluton} data frame has 45 rows and 4 columns, containing percentages of isotopic composition of 45 Plutonium batches. } \format{ This data frame contains the following columns: \describe{ \item{Pu238}{the percentages of \eqn{\ ^{238}Pu}{(238)Pu}, always less than 2 percent.} \item{Pu239}{the percentages of \eqn{\ ^{239}Pu}{(239)Pu}, typically between 60 and 80 percent (from neutron capture of Uranium, \eqn{\ ^{238}U}{(238)U}).} \item{Pu240}{percentage of the plutonium 240 isotope.} \item{Pu241}{percentage of the plutonium 241 isotope.} } } \details{ Note that the percentage of plutonium~242 can be computed from the other four percentages, see the examples. In the reference below it is explained why it is very desirable to combine these plutonium patches in three groups of similar size. } \source{ Available as \file{pluton.dat} from the archive of the University of Antwerpen, %% originally at %% \url{http://win-www.uia.ac.be/u/statis/datasets/clusplot-examples.tar.gz}, %% no longer: Jan.2015: %% currently \url{http://www.agoras.ua.ac.be/datasets/clusplot-examples.tar.gz}. \file{..../datasets/clusplot-examples.tar.gz}, no longer available. } \references{ Rousseeuw, P.J. and Kaufman, L and Trauwaert, E. (1996) Fuzzy clustering using scatter matrices, \emph{Computational Statistics and Data Analysis} \bold{23}(1), 135--151. } \examples{ data(pluton) hist(apply(pluton,1,sum), col = "gray") # between 94\% and 100\% pu5 <- pluton pu5$Pu242 <- 100 - apply(pluton,1,sum) # the remaining isotope. pairs(pu5) } \keyword{datasets} cluster/man/lower.to.upper.tri.inds.Rd0000644000175100001440000000174512422156450017457 0ustar hornikusers\name{lower.to.upper.tri.inds} \alias{lower.to.upper.tri.inds} \alias{upper.to.lower.tri.inds} \title{Permute Indices for Triangular Matrices} \description{ Compute index vectors for extracting or reordering of lower or upper triangular matrices that are stored as contiguous vectors. } \usage{ lower.to.upper.tri.inds(n) upper.to.lower.tri.inds(n) } \arguments{ \item{n}{integer larger than 1.} } \value{ integer vector containing a permutation of \code{1:N} where \eqn{N = n(n-1)/2}. } \seealso{\code{\link{upper.tri}}, \code{\link{lower.tri}} with a related purpose.} \examples{ m5 <- matrix(NA,5,5) m <- m5; m[lower.tri(m)] <- upper.to.lower.tri.inds(5); m m <- m5; m[upper.tri(m)] <- lower.to.upper.tri.inds(5); m stopifnot(lower.to.upper.tri.inds(2) == 1, lower.to.upper.tri.inds(3) == 1:3, upper.to.lower.tri.inds(3) == 1:3, sort(upper.to.lower.tri.inds(5)) == 1:10, sort(lower.to.upper.tri.inds(6)) == 1:15) } \keyword{array} \keyword{utilities} cluster/man/clusplot.partition.Rd0000644000175100001440000000542211573377221016703 0ustar hornikusers\name{clusplot} \alias{clusplot} \alias{clusplot.partition} \title{Bivariate Cluster Plot (of a Partitioning Object)} \description{ Draws a 2-dimensional \dQuote{clusplot} (clustering plot) on the current graphics device. The generic function has a default and a \code{partition} method. } \usage{ clusplot(x, \dots) \method{clusplot}{partition}(x, main = NULL, dist = NULL, \dots) } \arguments{ \item{x}{an \R object, here, specifically an object of class \code{"partition"}, e.g. created by one of the functions \code{\link{pam}}, \code{\link{clara}}, or \code{\link{fanny}}.} \item{main}{title for the plot; when \code{NULL} (by default), a title is constructed, using \code{x$call}.} \item{dist}{when \code{x} does not have a \code{diss} nor a \code{data} component, e.g., for \code{\link{pam}(dist(*), keep.diss=FALSE)}, \code{dist} must specify the dissimilarity for the clusplot.} \item{\dots}{optional arguments passed to methods, notably the \code{\link{clusplot.default}} method (except for the \code{diss} one) may also be supplied to this function. Many graphical parameters (see \code{\link{par}}) may also be supplied as arguments here.} } \section{Side Effects}{ a 2-dimensional clusplot is created on the current graphics device. } \value{ For the \code{partition} (and \code{default}) method: An invisible list with components \code{Distances} and \code{Shading}, as for \code{\link{clusplot.default}}, see there. } \details{ The \code{clusplot.partition()} method relies on \code{\link{clusplot.default}}. If the clustering algorithms \code{pam}, \code{fanny} and \code{clara} are applied to a data matrix of observations-by-variables then a clusplot of the resulting clustering can always be drawn. When the data matrix contains missing values and the clustering is performed with \code{\link{pam}} or \code{\link{fanny}}, the dissimilarity matrix will be given as input to \code{clusplot}. When the clustering algorithm \code{\link{clara}} was applied to a data matrix with NAs then clusplot will replace the missing values as described in \code{\link{clusplot.default}}, because a dissimilarity matrix is not available. } \seealso{\code{\link{clusplot.default}} for references; \code{\link{partition.object}}, \code{\link{pam}}, \code{\link{pam.object}}, \code{\link{clara}}, \code{\link{clara.object}}, \code{\link{fanny}}, \code{\link{fanny.object}}, \code{\link{par}}. } \examples{ ## For more, see ?clusplot.default ## generate 25 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)), cbind(rnorm(15,5,0.5), rnorm(15,5,0.5))) clusplot(pam(x, 2)) ## add noise, and try again : x4 <- cbind(x, rnorm(25), rnorm(25)) clusplot(pam(x4, 2)) } \keyword{cluster} \keyword{hplot} cluster/man/summary.mona.Rd0000644000175100001440000000075510370161217015446 0ustar hornikusers\name{summary.mona} \alias{summary.mona} \alias{print.summary.mona} \title{Summary Method for `mona' Objects} \description{Returns (and prints) a summary list for a \code{mona} object.} \usage{ \method{summary}{mona}(object, \dots) \method{print}{summary.mona}(x, \dots) } \arguments{ \item{x, object}{a \code{\link{mona}} object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{\code{\link{mona}}, \code{\link{mona.object}}.} \keyword{cluster} \keyword{print} cluster/man/agnes.object.Rd0000644000175100001440000000641312155364050015362 0ustar hornikusers\name{agnes.object} \alias{agnes.object} \title{Agglomerative Nesting (AGNES) Object} \description{ The objects of class \code{"agnes"} represent an agglomerative hierarchical clustering of a dataset. } \section{GENERATION}{ This class of objects is returned from \code{\link{agnes}}. } \section{METHODS}{ The \code{"agnes"} class has methods for the following generic functions: \code{print}, \code{summary}, \code{plot}, and \code{\link{as.dendrogram}}. In addition, \code{\link{cutree}(x, *)} can be used to \dQuote{cut} the dendrogram in order to produce cluster assignments. } \section{INHERITANCE}{ The class \code{"agnes"} inherits from \code{"twins"}. Therefore, the generic functions \code{\link{pltree}} and \code{\link{as.hclust}} are available for \code{agnes} objects. After applying \code{as.hclust()}, all \emph{its} methods are available, of course. } \value{ A legitimate \code{agnes} object is a list with the following components: \item{order}{ a vector giving a permutation of the original observations to allow for plotting, in the sense that the branches of a clustering tree will not cross.} \item{order.lab}{ a vector similar to \code{order}, but containing observation labels instead of observation numbers. This component is only available if the original observations were labelled. } \item{height}{ a vector with the distances between merging clusters at the successive stages. } \item{ac}{ the agglomerative coefficient, measuring the clustering structure of the dataset. For each observation i, denote by m(i) its dissimilarity to the first cluster it is merged with, divided by the dissimilarity of the merger in the final step of the algorithm. The \code{ac} is the average of all 1 - m(i). It can also be seen as the average width (or the percentage filled) of the banner plot. Because \code{ac} grows with the number of observations, this measure should not be used to compare datasets of very different sizes. } \item{merge}{ an (n-1) by 2 matrix, where n is the number of observations. Row i of \code{merge} describes the merging of clusters at step i of the clustering. If a number j in the row is negative, then the single observation |j| is merged at this stage. If j is positive, then the merger is with the cluster formed at stage j of the algorithm. } \item{diss}{ an object of class \code{"dissimilarity"} (see \code{\link{dissimilarity.object}}), representing the total dissimilarity matrix of the dataset. } \item{data}{ a matrix containing the original or standardized measurements, depending on the \code{stand} option of the function \code{agnes}. If a dissimilarity matrix was given as input structure, then this component is not available. } } \seealso{ \code{\link{agnes}}, \code{\link{diana}}, \code{\link{as.hclust}}, \code{\link{hclust}}, \code{\link{plot.agnes}}, \code{\link{twins.object}}. \code{\link{cutree}}. } \examples{ data(agriculture) ag.ag <- agnes(agriculture) class(ag.ag) pltree(ag.ag) # the dendrogram ## cut the dendrogram -> get cluster assignments: (ck3 <- cutree(ag.ag, k = 3)) (ch6 <- cutree(as.hclust(ag.ag), h = 6)) stopifnot(identical(unname(ch6), ck3)) } \keyword{cluster} cluster/man/bannerplot.Rd0000644000175100001440000000575210367701315015173 0ustar hornikusers\name{bannerplot} \alias{bannerplot} \title{Plot Banner (of Hierarchical Clustering)} \description{ Draws a \dQuote{banner}, i.e. basically a horizontal \code{\link{barplot}} visualizing the (agglomerative or divisive) hierarchical clustering or an other binary dendrogram structure. } \usage{ bannerplot(x, w = rev(x$height), fromLeft = TRUE, main=NULL, sub=NULL, xlab = "Height", adj = 0, col = c(2, 0), border = 0, axes = TRUE, frame.plot = axes, rev.xax = !fromLeft, xax.pretty = TRUE, labels = NULL, nmax.lab = 35, max.strlen = 5, yax.do = axes && length(x$order) <= nmax.lab, yaxRight = fromLeft, y.mar = 2.4 + max.strlen/2.5, \dots) } \arguments{ \item{x}{a list with components \code{order}, \code{order.lab} and \code{height} when \code{w}, the next argument is not specified.} \item{w}{non-negative numeric vector of bar widths.} \item{fromLeft}{logical, indicating if the banner is from the left or not.} \item{main,sub}{main and sub titles, see \code{\link{title}}.} \item{xlab}{x axis label (with \sQuote{correct} default e.g. for \code{plot.agnes}).} \item{adj}{passed to \code{\link{title}(main,sub)} for string adjustment.} \item{col}{vector of length 2, for two horizontal segments.} \item{border}{color for bar border; now defaults to background (no border).} \item{axes}{logical indicating if axes (and labels) should be drawn at all.} \item{frame.plot}{logical indicating the banner should be framed; mainly used when \code{border = 0} (as per default).} \item{rev.xax}{logical indicating if the x axis should be reversed (as in \code{plot.diana}).} \item{xax.pretty}{logical or integer indicating if \code{\link{pretty}()} should be used for the x axis. \code{xax.pretty = FALSE} is mainly for back compatibility.} \item{labels}{labels to use on y-axis; the default is constructed from \code{x}.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for single-name labelling the banner plot.} \item{max.strlen}{positive integer giving the length to which strings are truncated in banner plot labeling.} \item{yax.do}{logical indicating if a y axis and banner labels should be drawn.} \item{yaxRight}{logical indicating if the y axis is on the right or left.} \item{y.mar}{positive number specifying the margin width to use when banners are labeled (along a y-axis). The default adapts to the string width and optimally would also dependend on the font.} \item{\dots}{graphical parameters (see \code{\link{par}}) may also be supplied as arguments to this function.} } \author{Martin Maechler (from original code of Kaufman and Rousseeuw).} \note{This is mainly a utility called from \code{\link{plot.agnes}}, \code{\link{plot.diana}} and \code{\link{plot.mona}}. }% also serves as \seealso{*} \examples{ data(agriculture) bannerplot(agnes(agriculture), main = "Bannerplot") } \keyword{hplot} \keyword{cluster} \keyword{utilities} cluster/man/plot.mona.Rd0000644000175100001440000000406010370161217014720 0ustar hornikusers\name{plot.mona} \alias{plot.mona} \title{Banner of Monothetic Divisive Hierarchical Clusterings} \description{ Creates the banner of a \code{mona} object. } \usage{ \method{plot}{mona}(x, main = paste("Banner of ", deparse(x$call)), sub = NULL, xlab = "Separation step", col = c(2,0), axes = TRUE, adj = 0, nmax.lab = 35, max.strlen = 5, \dots) } \arguments{ \item{x}{an object of class \code{"mona"}, typically created by \code{\link{mona}(.)}.} \item{main,sub}{main and sub titles for the plot, with convenient defaults. See documentation in \code{\link{plot.default}}.} \item{xlab}{x axis label, see \code{\link{title}}.} \item{col,adj}{graphical parameters passed to \code{\link{bannerplot}()}.} \item{axes}{logical, indicating if (labeled) axes should be drawn.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for labeling.} \item{max.strlen}{positive integer giving the length to which strings are truncated in labeling.} \item{\dots}{further graphical arguments are passed to \code{\link{bannerplot}()} and \code{\link{text}}.} } \section{Side Effects}{ A banner is plotted on the current graphics device. } \details{ Plots the separation step at which clusters are splitted. The observations are given in the order found by the \code{mona} algorithm, the numbers in the \code{step} vector are represented as bars between the observations. When a long bar is drawn between two observations, those observations have the same value for each variable. See chapter 7 of Kaufman and Rousseeuw (1990). } \note{ In the banner plot, observation labels are only printed when the number of observations is limited less than \code{nmax.lab} (35, by default), for readability. Moreover, observation labels are truncated to maximally \code{max.strlen} (5) characters. } \references{see those in \code{\link{plot.agnes}}.} \seealso{ \code{\link{mona}}, \code{\link{mona.object}}, \code{\link{par}}. } \keyword{cluster} \keyword{hplot} % Converted by Sd2Rd version 0.3-2. cluster/man/agriculture.Rd0000644000175100001440000000274211711740461015350 0ustar hornikusers\name{agriculture} \alias{agriculture} \title{European Union Agricultural Workforces} \usage{data(agriculture)} \description{ Gross National Product (GNP) per capita and percentage of the population working in agriculture for each country belonging to the European Union in 1993. } \format{ A data frame with 12 observations on 2 variables: \tabular{rlll}{ [ , 1] \tab \code{x} \tab numeric \tab per capita GNP \cr [ , 2] \tab \code{y} \tab numeric \tab percentage in agriculture } The row names of the data frame indicate the countries. } \source{ Eurostat (European Statistical Agency, 1994): \emph{Cijfers en feiten: Een statistisch portret van de Europese Unie}. } \details{ The data seem to show two clusters, the \dQuote{more agricultural} one consisting of Greece, Portugal, Spain, and Ireland. } \seealso{\code{\link{agnes}}, \code{\link{daisy}}, \code{\link{diana}}. } \references{ see those in \code{\link{agnes}}. } \examples{ data(agriculture) ## Compute the dissimilarities using Euclidean metric and without ## standardization daisy(agriculture, metric = "euclidean", stand = FALSE) ## 2nd plot is similar to Figure 3 in Struyf et al (1996) plot(pam(agriculture, 2)) ## Plot similar to Figure 7 in Struyf et al (1996) \dontrun{plot(agnes(agriculture), ask = TRUE)} \dontshow{plot(agnes(agriculture))} ## Plot similar to Figure 8 in Struyf et al (1996) \dontrun{plot(diana(agriculture), ask = TRUE)} \dontshow{plot(diana(agriculture))} } \keyword{datasets} cluster/man/clara.object.Rd0000644000175100001440000000505210500041124015327 0ustar hornikusers\name{clara.object} \alias{clara.object} \title{Clustering Large Applications (CLARA) Object} \description{ The objects of class \code{"clara"} represent a partitioning of a large dataset into clusters and are typically returned from \code{\link{clara}}. } \section{Methods, Inheritance}{ The \code{"clara"} class has methods for the following generic functions: \code{print}, \code{summary}. The class \code{"clara"} inherits from \code{"partition"}. Therefore, the generic functions \code{plot} and \code{clusplot} can be used on a \code{clara} object. } \value{ A legitimate \code{clara} object is a list with the following components: \item{sample}{ labels or case numbers of the observations in the best sample, that is, the sample used by the \code{clara} algorithm for the final partition.} \item{medoids}{the medoids or representative objects of the clusters. It is a matrix with in each row the coordinates of one medoid. Possibly \code{NULL}, namely when the object resulted from \code{clara(*, medoids.x=FALSE)}. Use the following \code{i.med} in that case.} \item{i.med}{ the \emph{indices} of the \code{medoids} above: \code{medoids <- x[i.med,]} where \code{x} is the original data matrix in \code{clara(x,*)}.} \item{clustering}{the clustering vector, see \code{\link{partition.object}}.} \item{objective}{the objective function for the final clustering of the entire dataset.} \item{clusinfo}{ matrix, each row gives numerical information for one cluster. These are the cardinality of the cluster (number of observations), the maximal and average dissimilarity between the observations in the cluster and the cluster's medoid. %% FIXME: Now differs from pam.object.Rd: The last column is the maximal dissimilarity between the observations in the cluster and the cluster's medoid, divided by the minimal dissimilarity between the cluster's medoid and the medoid of any other cluster. If this ratio is small, the cluster is well-separated from the other clusters. } \item{diss}{dissimilarity (maybe NULL), see \code{\link{partition.object}}.} \item{silinfo}{list with silhouette width information for the best sample, see \code{\link{partition.object}}.} \item{call}{generating call, see \code{\link{partition.object}}.} \item{data}{matrix, possibibly standardized, or NULL, see \code{\link{partition.object}}.} } \seealso{ \code{\link{clara}}, \code{\link{dissimilarity.object}}, \code{\link{partition.object}}, \code{\link{plot.partition}}. } \keyword{cluster} cluster/man/silhouette.Rd0000644000175100001440000002110312307427016015177 0ustar hornikusers\name{silhouette} \alias{silhouette} \alias{silhouette.clara} \alias{silhouette.default} \alias{silhouette.partition} \alias{sortSilhouette} \alias{summary.silhouette} \alias{print.summary.silhouette} \alias{plot.silhouette} \title{Compute or Extract Silhouette Information from Clustering} \description{ Compute silhouette information according to a given clustering in \eqn{k} clusters. } \usage{ silhouette(x, \dots) \method{silhouette}{default} (x, dist, dmatrix, \dots) \method{silhouette}{partition}(x, \dots) \method{silhouette}{clara}(x, full = FALSE, \dots) sortSilhouette(object, \dots) \method{summary}{silhouette}(object, FUN = mean, \dots) \method{plot}{silhouette}(x, nmax.lab = 40, max.strlen = 5, main = NULL, sub = NULL, xlab = expression("Silhouette width "* s[i]), col = "gray", do.col.sort = length(col) > 1, border = 0, cex.names = par("cex.axis"), do.n.k = TRUE, do.clus.stat = TRUE, \dots) } \arguments{ \item{x}{an object of appropriate class; for the \code{default} method an integer vector with \eqn{k} different integer cluster codes or a list with such an \code{x$clustering} component. Note that silhouette statistics are only defined if \eqn{2 \le k \le n-1}{2 <= k <= n-1}.} \item{dist}{a dissimilarity object inheriting from class \code{\link{dist}} or coercible to one. If not specified, \code{dmatrix} must be.} \item{dmatrix}{a symmetric dissimilarity matrix (\eqn{n \times n}{n * n}), specified instead of \code{dist}, which can be more efficient.} \item{full}{logical specifying if a \emph{full} silhouette should be computed for \code{\link{clara}} object. Note that this requires \eqn{O(n^2)} memory, since the full dissimilarity (see \code{\link{daisy}}) is needed internally.} \item{object}{an object of class \code{silhouette}.} \item{\dots}{further arguments passed to and from methods.} \item{FUN}{function used to summarize silhouette widths.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for single-name labeling the silhouette plot.} \item{max.strlen}{positive integer giving the length to which strings are truncated in silhouette plot labeling.} \item{main, sub, xlab}{arguments to \code{\link{title}}; have a sensible non-NULL default here.} \item{col, border, cex.names}{arguments passed \code{\link{barplot}()}; note that the default used to be \code{col = heat.colors(n), border = par("fg")} instead.\cr \code{col} can also be a color vector of length \eqn{k} for clusterwise coloring, see also \code{do.col.sort}: } \item{do.col.sort}{logical indicating if the colors \code{col} should be sorted \dQuote{along} the silhouette; this is useful for casewise or clusterwise coloring.} \item{do.n.k}{logical indicating if \eqn{n} and \eqn{k} \dQuote{title text} should be written.} \item{do.clus.stat}{logical indicating if cluster size and averages should be written right to the silhouettes.} } \details{ For each observation i, the \emph{silhouette width} \eqn{s(i)} is defined as follows: \cr Put a(i) = average dissimilarity between i and all other points of the cluster to which i belongs (if i is the \emph{only} observation in its cluster, \eqn{s(i) := 0} without further calculations). For all \emph{other} clusters C, put \eqn{d(i,C)} = average dissimilarity of i to all observations of C. The smallest of these \eqn{d(i,C)} is \eqn{b(i) := \min_C d(i,C)}, and can be seen as the dissimilarity between i and its \dQuote{neighbor} cluster, i.e., the nearest one to which it does \emph{not} belong. Finally, \deqn{s(i) := \frac{b(i) - a(i) }{max(a(i), b(i))}.}{% s(i) := ( b(i) - a(i) ) / max( a(i), b(i) ).} \code{silhouette.default()} is now based on C code donated by Romain Francois (the R version being still available as \code{cluster:::silhouette.default.R}). Observations with a large \eqn{s(i)} (almost 1) are very well clustered, a small \eqn{s(i)} (around 0) means that the observation lies between two clusters, and observations with a negative \eqn{s(i)} are probably placed in the wrong cluster. } \note{ While \code{silhouette()} is \emph{intrinsic} to the \code{\link{partition}} clusterings, and hence has a (trivial) method for these, it is straightforward to get silhouettes from hierarchical clusterings from \code{silhouette.default()} with \code{\link{cutree}()} and distance as input. By default, for \code{\link{clara}()} partitions, the silhouette is just for the best random \emph{subset} used. Use \code{full = TRUE} to compute (and later possibly plot) the full silhouette. } \value{ \code{silhouette()} returns an object, \code{sil}, of class \code{silhouette} which is an [n x 3] matrix with attributes. For each observation i, \code{sil[i,]} contains the cluster to which i belongs as well as the neighbor cluster of i (the cluster, not containing i, for which the average dissimilarity between its observations and i is minimal), and the silhouette width \eqn{s(i)} of the observation. The \code{\link{colnames}} correspondingly are \code{c("cluster", "neighbor", "sil_width")}. \code{summary(sil)} returns an object of class \code{summary.silhouette}, a list with components %%Rd bug: fails inside \value{}!:\describe{ \item{si.summary}{numerical \code{\link{summary}} of the individual silhouette widths \eqn{s(i)}.} \item{clus.avg.widths}{numeric (rank 1) array of clusterwise \emph{means} of silhouette widths where \code{mean = FUN} is used.} \item{avg.width}{the total mean \code{FUN(s)} where \code{s} are the individual silhouette widths.} \item{clus.sizes}{\code{\link{table}} of the \eqn{k} cluster sizes.} \item{call}{if available, the call creating \code{sil}.} \item{Ordered}{logical identical to \code{attr(sil, "Ordered")}, see below.} %%Rd bug: } \code{sortSilhouette(sil)} orders the rows of \code{sil} as in the silhouette plot, by cluster (increasingly) and decreasing silhouette width \eqn{s(i)}. \cr \code{attr(sil, "Ordered")} is a logical indicating if \code{sil} \emph{is} ordered as by \code{sortSilhouette()}. In that case, \code{rownames(sil)} will contain case labels or numbers, and \cr \code{attr(sil, "iOrd")} the ordering index vector. } \references{ Rousseeuw, P.J. (1987) Silhouettes: A graphical aid to the interpretation and validation of cluster analysis. \emph{J. Comput. Appl. Math.}, \bold{20}, 53--65. chapter 2 of Kaufman and Rousseeuw (1990), see the references in \code{\link{plot.agnes}}. } \seealso{\code{\link{partition.object}}, \code{\link{plot.partition}}. } \examples{ data(ruspini) pr4 <- pam(ruspini, 4) str(si <- silhouette(pr4)) (ssi <- summary(si)) plot(si) # silhouette plot plot(si, col = c("red", "green", "blue", "purple"))# with cluster-wise coloring si2 <- silhouette(pr4$clustering, dist(ruspini, "canberra")) summary(si2) # has small values: "canberra"'s fault plot(si2, nmax= 80, cex.names=0.6) op <- par(mfrow= c(3,2), oma= c(0,0, 3, 0), mgp= c(1.6,.8,0), mar= .1+c(4,2,2,2)) for(k in 2:6) plot(silhouette(pam(ruspini, k=k)), main = paste("k = ",k), do.n.k=FALSE) mtext("PAM(Ruspini) as in Kaufman & Rousseeuw, p.101", outer = TRUE, font = par("font.main"), cex = par("cex.main")); frame() ## the same with cluster-wise colours: c6 <- c("tomato", "forest green", "dark blue", "purple2", "goldenrod4", "gray20") for(k in 2:6) plot(silhouette(pam(ruspini, k=k)), main = paste("k = ",k), do.n.k=FALSE, col = c6[1:k]) par(op) ## clara(): standard silhouette is just for the best random subset data(xclara) set.seed(7) str(xc1k <- xclara[sample(nrow(xclara), size = 1000) ,]) cl3 <- clara(xc1k, 3) plot(silhouette(cl3))# only of the "best" subset of 46 ## The full silhouette: internally needs large (36 MB) dist object: sf <- silhouette(cl3, full = TRUE) ## this is the same as s.full <- silhouette(cl3$clustering, daisy(xc1k)) if(paste(R.version$major, R.version$minor, sep=".") >= "2.3.0") stopifnot(all.equal(sf, s.full, check.attributes = FALSE, tolerance = 0)) ## color dependent on original "3 groups of each 1000": plot(sf, col = 2+ as.integer(names(cl3$clustering) ) \%/\% 1000, main ="plot(silhouette(clara(.), full = TRUE))") ## Silhouette for a hierarchical clustering: ar <- agnes(ruspini) si3 <- silhouette(cutree(ar, k = 5), # k = 4 gave the same as pam() above daisy(ruspini)) plot(si3, nmax = 80, cex.names = 0.5) ## 2 groups: Agnes() wasn't too good: si4 <- silhouette(cutree(ar, k = 2), daisy(ruspini)) plot(si4, nmax = 80, cex.names = 0.5) } \keyword{cluster} cluster/man/cluster-internal.Rd0000644000175100001440000000033507420040427016306 0ustar hornikusers\name{cluster-internal} \alias{meanabsdev} \title{Internal cluster functions} \description{ Internal cluster functions. } \usage{ meanabsdev(y) } \details{ These are not to be called by the user. } \keyword{internal} cluster/man/print.pam.Rd0000644000175100001440000000110410073061404014712 0ustar hornikusers\name{print.pam} \alias{print.pam} \title{Print Method for PAM Objects} \description{ Prints the medoids, clustering vector and objective function of \code{pam} object. This is a method for the function \code{\link{print}()} for objects inheriting from class \code{\link{pam}}. } \usage{ \method{print}{pam}(x, \dots) } \arguments{ \item{x}{a pam object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{ \code{\link{pam}}, \code{\link{pam.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} cluster/man/daisy.Rd0000644000175100001440000002310412236154412014124 0ustar hornikusers\name{daisy} \alias{daisy} \title{Dissimilarity Matrix Calculation} \concept{Gower's formula} \concept{Gower's distance} \concept{Gower's coefficient}% FIXME: see ../TODO-MM \description{ Compute all the pairwise dissimilarities (distances) between observations in the data set. The original variables may be of mixed types. In that case, or whenever \code{metric = "gower"} is set, a generalization of Gower's formula is used, see \sQuote{Details} below. } \usage{ daisy(x, metric = c("euclidean", "manhattan", "gower"), stand = FALSE, type = list(), weights = rep.int(1, p)) } \arguments{ \item{x}{ numeric matrix or data frame, of dimension \eqn{n\times p}{n x p}, say. Dissimilarities will be computed between the rows of \code{x}. Columns of mode \code{numeric} (i.e. all columns when \code{x} is a matrix) will be recognized as interval scaled variables, columns of class \code{factor} will be recognized as nominal variables, and columns of class \code{ordered} will be recognized as ordinal variables. Other variable types should be specified with the \code{type} argument. Missing values (\code{\link{NA}}s) are allowed. } \item{metric}{ character string specifying the metric to be used. The currently available options are \code{"euclidean"} (the default), \code{"manhattan"} and \code{"gower"}.\cr Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences. \dQuote{Gower's distance} is chosen by metric \code{"gower"} or automatically if some columns of \code{x} are not numeric. Also known as Gower's coefficient (1971), expressed as a dissimilarity, this implies that a particular standardisation will be applied to each variable, and the \dQuote{distance} between two units is the sum of all the variable-specific distances, see the details section. } \item{stand}{logical flag: if TRUE, then the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. If not all columns of \code{x} are numeric, \code{stand} will be ignored and Gower's standardization (based on the \code{\link{range}}) will be applied in any case, see argument \code{metric}, above, and the details section. } \item{type}{list for specifying some (or all) of the types of the variables (columns) in \code{x}. The list may contain the following components: \code{"ordratio"} (ratio scaled variables to be treated as ordinal variables), \code{"logratio"} (ratio scaled variables that must be logarithmically transformed), \code{"asymm"} (asymmetric binary) and \code{"symm"} (symmetric binary variables). Each component's value is a vector, containing the names or the numbers of the corresponding columns of \code{x}. Variables not mentioned in the \code{type} list are interpreted as usual (see argument \code{x}). } \item{weights}{an optional numeric vector of length \eqn{p}(=\code{ncol(x)}); to be used in \dQuote{case 2} (mixed variables, or \code{metric = "gower"}), specifying a weight for each variable (\code{x[,k]}) instead of \eqn{1} in Gower's original formula.} }% end{arg..} \value{ an object of class \code{"dissimilarity"} containing the dissimilarities among the rows of \code{x}. This is typically the input for the functions \code{pam}, \code{fanny}, \code{agnes} or \code{diana}. For more details, see \code{\link{dissimilarity.object}}. } \details{ The original version of \code{daisy} is fully described in chapter 1 of Kaufman and Rousseeuw (1990). Compared to \code{\link{dist}} whose input must be numeric variables, the main feature of \code{daisy} is its ability to handle other variable types as well (e.g. nominal, ordinal, (a)symmetric binary) even when different types occur in the same data set. The handling of nominal, ordinal, and (a)symmetric binary data is achieved by using the general dissimilarity coefficient of Gower (1971). If \code{x} contains any columns of these data-types, both arguments \code{metric} and \code{stand} will be ignored and Gower's coefficient will be used as the metric. This can also be activated for purely numeric data by \code{metric = "gower"}. With that, each variable (column) is first standardized by dividing each entry by the range of the corresponding variable, after subtracting the minimum value; consequently the rescaled variable has range \eqn{[0,1]}, exactly. %% FIXME: Use something like "gowerRob" which uses *robust* rescaling Note that setting the type to \code{symm} (symmetric binary) gives the same dissimilarities as using \emph{nominal} (which is chosen for non-ordered factors) only when no missing values are present, and more efficiently. Note that \code{daisy} now gives a warning when 2-valued numerical variables do not have an explicit \code{type} specified, because the reference authors recommend to consider using \code{"asymm"}. In the \code{daisy} algorithm, missing values in a row of x are not included in the dissimilarities involving that row. There are two main cases, \enumerate{ \item If all variables are interval scaled (and \code{metric} is \emph{not} \code{"gower"}), the metric is "euclidean", and \eqn{n_g} is the number of columns in which neither row i and j have NAs, then the dissimilarity d(i,j) returned is \eqn{\sqrt{p/n_g}}{sqrt(p/n_g)} (\eqn{p=}ncol(x)) times the Euclidean distance between the two vectors of length \eqn{n_g} shortened to exclude NAs. The rule is similar for the "manhattan" metric, except that the coefficient is \eqn{p/n_g}. If \eqn{n_g = 0}, the dissimilarity is NA. \item When some variables have a type other than interval scaled, or if \code{metric = "gower"} is specified, the dissimilarity between two rows is the weighted mean of the contributions of each variable. Specifically, \deqn{d_{ij} = d(i,j) = \frac{\sum_{k=1}^p w_k \delta_{ij}^{(k)} d_{ij}^{(k)}}{ \sum_{k=1}^p w_k \delta_{ij}^{(k)}}. }{d_ij = d(i,j) = sum(k=1:p; w_k delta(ij;k) d(ij,k)) / sum(k=1:p; w_k delta(ij;k)).} In other words, \eqn{d_{ij}}{d_ij} is a weighted mean of \eqn{d_{ij}^{(k)}}{d(ij,k)} with weights \eqn{w_k \delta_{ij}^{(k)}}{w_k delta(ij;k)}, where \eqn{w_k}\code{= weigths[k]}, \eqn{\delta_{ij}^{(k)}}{delta(ij;k)} is 0 or 1, and \eqn{d_{ij}^{(k)}}{d(ij,k)}, the k-th variable contribution to the total distance, is a distance between \code{x[i,k]} and \code{x[j,k]}, see below. The 0-1 weight \eqn{\delta_{ij}^{(k)}}{delta(ij;k)} becomes zero when the variable \code{x[,k]} is missing in either or both rows (i and j), or when the variable is asymmetric binary and both values are zero. In all other situations it is 1. The contribution \eqn{d_{ij}^{(k)}}{d(ij,k)} of a nominal or binary variable to the total dissimilarity is 0 if both values are equal, 1 otherwise. The contribution of other variables is the absolute difference of both values, divided by the total range of that variable. Note that \dQuote{standard scoring} is applied to ordinal variables, i.e., they are replaced by their integer codes \code{1:K}. Note that this is not the same as using their ranks (since there typically are ties). % contrary to what Kaufman and Rousseeuw write in their book, and % the original help page. As the individual contributions \eqn{d_{ij}^{(k)}}{d(ij,k)} are in \eqn{[0,1]}, the dissimilarity \eqn{d_{ij}}{d_ij} will remain in this range. If all weights \eqn{w_k \delta_{ij}^{(k)}}{w_k delta(ij;k)} are zero, the dissimilarity is set to \code{\link{NA}}. } } \section{Background}{ Dissimilarities are used as inputs to cluster analysis and multidimensional scaling. The choice of metric may have a large impact. } \references{ Gower, J. C. (1971) A general coefficient of similarity and some of its properties, \emph{Biometrics} \bold{27}, 857--874. Kaufman, L. and Rousseeuw, P.J. (1990) \emph{Finding Groups in Data: An Introduction to Cluster Analysis}. Wiley, New York. Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997) Integrating Robust Clustering Techniques in S-PLUS, \emph{Computational Statistics and Data Analysis} \bold{26}, 17--37. } \author{ Anja Struyf, Mia Hubert, and Peter and Rousseeuw, for the original version. \cr Martin Maechler improved the \code{\link{NA}} handling and \code{type} specification checking, and extended functionality to \code{metric = "gower"} and the optional \code{weights} argument. } \seealso{ \code{\link{dissimilarity.object}}, \code{\link{dist}}, \code{\link{pam}}, \code{\link{fanny}}, \code{\link{clara}}, \code{\link{agnes}}, \code{\link{diana}}. } \examples{ data(agriculture) ## Example 1 in ref: ## Dissimilarities using Euclidean metric and without standardization d.agr <- daisy(agriculture, metric = "euclidean", stand = FALSE) d.agr as.matrix(d.agr)[,"DK"] # via as.matrix.dist(.) ## compare with as.matrix(daisy(agriculture, metric = "gower")) data(flower) ## Example 2 in ref summary(dfl1 <- daisy(flower, type = list(asymm = 3))) summary(dfl2 <- daisy(flower, type = list(asymm = c(1, 3), ordratio = 7))) ## this failed earlier: summary(dfl3 <- daisy(flower, type = list(asymm = c("V1", "V3"), symm= 2, ordratio= 7, logratio= 8))) } \keyword{cluster} cluster/man/partition.object.Rd0000644000175100001440000000544410370161217016276 0ustar hornikusers\name{partition.object} \alias{partition}% == class \alias{partition.object} \title{Partitioning Object} \description{ The objects of class \code{"partition"} represent a partitioning of a dataset into clusters. } \section{GENERATION}{ These objects are returned from \code{pam}, \code{clara} or \code{fanny}. } \section{METHODS}{ The \code{"partition"} class has a method for the following generic functions: \code{plot}, \code{clusplot}. } \section{INHERITANCE}{ The following classes inherit from class \code{"partition"} : \code{"pam"}, \code{"clara"} and \code{"fanny"}. See \code{\link{pam.object}}, \code{\link{clara.object}} and \code{\link{fanny.object}} for details. } \value{a \code{"partition"} object is a list with the following (and typically more) components: \item{clustering}{ the clustering vector. An integer vector of length \eqn{n}, the number of observations, giving for each observation the number (`id') of the cluster to which it belongs.} \item{call}{the matched \code{\link{call}} generating the object.} \item{silinfo}{ a list with all \emph{silhouette} information, only available when the number of clusters is non-trivial, i.e., \eqn{1 < k < n} and then has the following components, see \code{\link{silhouette}} \describe{ \item{widths}{an (n x 3) matrix, as returned by \code{\link{silhouette}()}, with for each observation i the cluster to which i belongs, as well as the neighbor cluster of i (the cluster, not containing i, for which the average dissimilarity between its observations and i is minimal), and the silhouette width \eqn{s(i)} of the observation. } \item{clus.avg.widths}{the average silhouette width per cluster.} \item{avg.width}{the average silhouette width for the dataset, i.e., simply the average of \eqn{s(i)} over all observations \eqn{i}.} }% describe This information is also needed to construct a \emph{silhouette plot} of the clustering, see \code{\link{plot.partition}}. Note that \code{avg.width} can be maximized over different clusterings (e.g. with varying number of clusters) to choose an \emph{optimal} clustering.%% see an example or a demo << FIXME >> } \item{objective}{value of criterion maximized during the partitioning algorithm, may more than one entry for different stages.} \item{diss}{ an object of class \code{"dissimilarity"}, representing the total dissimilarity matrix of the dataset (or relevant subset, e.g. for \code{clara}). } \item{data}{ a matrix containing the original or standardized data. This might be missing to save memory or when a dissimilarity matrix was given as input structure to the clustering method. } } \seealso{\code{\link{pam}}, \code{\link{clara}}, \code{\link{fanny}}. } \keyword{cluster} cluster/man/plot.diana.Rd0000644000175100001440000000662410631261142015050 0ustar hornikusers\name{plot.diana} %% almost identical to ./plot.agnes.Rd and quite similar to ./plot.mona.Rd \alias{plot.diana} \title{Plots of a Divisive Hierarchical Clustering} \description{ Creates plots for visualizing a \code{diana} object. } \usage{ \method{plot}{diana}(x, ask = FALSE, which.plots = NULL, main = NULL, sub = paste("Divisive Coefficient = ", round(x$dc, digits = 2)), adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, \dots) } \arguments{ \item{x}{an object of class \code{"diana"}, typically created by \code{\link{diana}(.)}.} \item{ask}{logical; if true and \code{which.plots} is \code{NULL}, \code{plot.diana} operates in interactive mode, via \code{\link{menu}}.} \item{which.plots}{integer vector or NULL (default), the latter producing both plots. Otherwise, \code{which.plots} must contain integers of \code{1} for a \emph{banner} plot or \code{2} for a dendrogram or ``clustering tree''.} \item{main, sub}{main and sub title for the plot, each with a convenient default. See documentation for these arguments in \code{\link{plot.default}}.} \item{adj}{for label adjustment in \code{\link{bannerplot}()}.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for single-name labelling the banner plot.} \item{max.strlen}{positive integer giving the length to which strings are truncated in banner plot labeling.} \item{xax.pretty}{logical or integer indicating if \code{\link{pretty}(*, n = xax.pretty)} should be used for the x axis. \code{xax.pretty = FALSE} is for back compatibility.} \item{\dots}{graphical parameters (see \code{\link{par}}) may also be supplied and are passed to \code{\link{bannerplot}()} or \code{\link{pltree}()}, respectively.} } \section{Side Effects}{ An appropriate plot is produced on the current graphics device. This can be one or both of the following choices: \cr Banner \cr Clustering tree } \details{ When \code{ask = TRUE}, rather than producing each plot sequentially, \code{plot.diana} displays a menu listing all the plots that can be produced. If the menu is not desired but a pause between plots is still wanted one must set \code{par(ask= TRUE)} before invoking the plot command. The banner displays the hierarchy of clusters, and is equivalent to a tree. See Rousseeuw (1986) or chapter 6 of Kaufman and Rousseeuw (1990). The banner plots the diameter of each cluster being splitted. The observations are listed in the order found by the \code{diana} algorithm, and the numbers in the \code{height} vector are represented as bars between the observations. The leaves of the clustering tree are the original observations. A branch splits up at the diameter of the cluster being splitted. } \note{ In the banner plot, observation labels are only printed when the number of observations is limited less than \code{nmax.lab} (35, by default), for readability. Moreover, observation labels are truncated to maximally \code{max.strlen} (5) characters. } \references{see those in \code{\link{plot.agnes}}.} \seealso{ \code{\link{diana}}, \code{\link{diana.object}}, \code{\link{twins.object}}, \code{\link{par}}. } \examples{ example(diana)# -> dv <- diana(....) plot(dv, which = 1, nmax.lab = 100) ## wider labels : op <- par(mar = par("mar") + c(0, 2, 0,0)) plot(dv, which = 1, nmax.lab = 100, max.strlen = 12) par(op) } \keyword{cluster} \keyword{hplot} cluster/man/pam.Rd0000644000175100001440000001775312307427016013607 0ustar hornikusers\name{pam} \alias{pam} \title{Partitioning Around Medoids} \description{ Partitioning (clustering) of the data into \code{k} clusters \dQuote{around medoids}, a more robust version of K-means. } \usage{ pam(x, k, diss = inherits(x, "dist"), metric = "euclidean", medoids = NULL, stand = FALSE, cluster.only = FALSE, do.swap = TRUE, keep.diss = !diss && !cluster.only && n < 100, keep.data = !diss && !cluster.only, pamonce = FALSE, trace.lev = 0) } \arguments{ \item{x}{ data matrix or data frame, or dissimilarity matrix or object, depending on the value of the \code{diss} argument. In case of a matrix or data frame, each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (\code{\link{NA}}s) \emph{are} allowed---as long as every pair of observations has at least one case not missing. In case of a dissimilarity matrix, \code{x} is typically the output of \code{\link{daisy}} or \code{\link{dist}}. Also a vector of length n*(n-1)/2 is allowed (where n is the number of observations), and will be interpreted in the same way as the output of the above-mentioned functions. Missing values (NAs) are \emph{not} allowed. } \item{k}{positive integer specifying the number of clusters, less than the number of observations.} \item{diss}{ logical flag: if TRUE (default for \code{dist} or \code{dissimilarity} objects), then \code{x} will be considered as a dissimilarity matrix. If FALSE, then \code{x} will be considered as a matrix of observations by variables. } \item{metric}{ character string specifying the metric to be used for calculating dissimilarities between observations.\cr The currently available options are "euclidean" and "manhattan". Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences. If \code{x} is already a dissimilarity matrix, then this argument will be ignored. } \item{medoids}{NULL (default) or length-\code{k} vector of integer indices (in \code{1:n}) specifying initial medoids instead of using the \sQuote{\emph{build}} algorithm.} \item{stand}{logical; if true, the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. If \code{x} is already a dissimilarity matrix, then this argument will be ignored.} \item{cluster.only}{logical; if true, only the clustering will be computed and returned, see details.} \item{do.swap}{logical indicating if the \bold{swap} phase should happen. The default, \code{TRUE}, correspond to the original algorithm. On the other hand, the \bold{swap} phase is much more computer intensive than the \bold{build} one for large \eqn{n}, so can be skipped by \code{do.swap = FALSE}.} \item{keep.diss, keep.data}{logicals indicating if the dissimilarities and/or input data \code{x} should be kept in the result. Setting these to \code{FALSE} can give much smaller results and hence even save memory allocation \emph{time}.} \item{pamonce}{logical or integer in \code{0:2} specifying algorithmic short cuts as proposed by Reynolds et al. (2006), see below.} \item{trace.lev}{integer specifying a trace level for printing diagnostics during the build and swap phase of the algorithm. Default \code{0} does not print anything; higher values print increasingly more.} } \value{ an object of class \code{"pam"} representing the clustering. See \code{?\link{pam.object}} for details. } \details{ The basic \code{pam} algorithm is fully described in chapter 2 of Kaufman and Rousseeuw(1990). Compared to the k-means approach in \code{kmeans}, the function \code{pam} has the following features: (a) it also accepts a dissimilarity matrix; (b) it is more robust because it minimizes a sum of dissimilarities instead of a sum of squared euclidean distances; (c) it provides a novel graphical display, the silhouette plot (see \code{plot.partition}) (d) it allows to select the number of clusters using \code{mean(\link{silhouette}(pr))} on the result \code{pr <- pam(..)}, or directly its component \code{pr$silinfo$avg.width}, see also \code{\link{pam.object}}. When \code{cluster.only} is true, the result is simply a (possibly named) integer vector specifying the clustering, i.e.,\cr \code{pam(x,k, cluster.only=TRUE)} is the same as \cr \code{pam(x,k)$clustering} but computed more efficiently. The \code{pam}-algorithm is based on the search for \code{k} representative objects or medoids among the observations of the dataset. These observations should represent the structure of the data. After finding a set of \code{k} medoids, \code{k} clusters are constructed by assigning each observation to the nearest medoid. The goal is to find \code{k} representative objects which minimize the sum of the dissimilarities of the observations to their closest representative object. \cr By default, when \code{medoids} are not specified, the algorithm first looks for a good initial set of medoids (this is called the \bold{build} phase). Then it finds a local minimum for the objective function, that is, a solution such that there is no single switch of an observation with a medoid that will decrease the objective (this is called the \bold{swap} phase). When the \code{medoids} are specified, their order does \emph{not} matter; in general, the algorithms have been designed to not depend on the order of the observations. The \code{pamonce} option, new in cluster 1.14.2 (Jan. 2012), has been proposed by Matthias Studer, University of Geneva, based on the findings by Reynolds et al. (2006). The default \code{FALSE} (or integer \code{0}) corresponds to the original \dQuote{swap} algorithm, whereas \code{pamonce = 1} (or \code{TRUE}), corresponds to the first proposal .... %% FIXME and \code{pamonce = 2} additionally implements the second proposal as well. % FIXME more details } \note{ For large datasets, \code{pam} may need too much memory or too much computation time since both are \eqn{O(n^2)}. Then, \code{\link{clara}()} is preferable, see its documentation. } \author{Kaufman and Rousseeuw's orginal Fortran code was translated to C and augmented in several ways, e.g. to allow \code{cluster.only=TRUE} or \code{do.swap=FALSE}, by Martin Maechler. \cr Matthias Studer, Univ.Geneva provided the \code{pamonce} implementation. } \references{ %% the pamonce options : Reynolds, A., Richards, G., de la Iglesia, B. and Rayward-Smith, V. (1992) Clustering rules: A comparison of partitioning and hierarchical clustering algorithms; \emph{Journal of Mathematical Modelling and Algorithms} \bold{5}, 475--504 (\url{http://dx.doi.org/10.1007/s10852-005-9022-1}). } \seealso{ \code{\link{agnes}} for background and references; \code{\link{pam.object}}, \code{\link{clara}}, \code{\link{daisy}}, \code{\link{partition.object}}, \code{\link{plot.partition}}, \code{\link{dist}}. } \examples{ ## generate 25 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(10,0,0.5), rnorm(10,0,0.5)), cbind(rnorm(15,5,0.5), rnorm(15,5,0.5))) pamx <- pam(x, 2) pamx summary(pamx) plot(pamx) ## use obs. 1 & 16 as starting medoids -- same result (typically) (p2m <- pam(x, 2, medoids = c(1,16))) p3m <- pam(x, 3, trace = 2) ## rather stupid initial medoids: (p3m. <- pam(x, 3, medoids = 3:1, trace = 1)) \dontshow{ ii <- pmatch(c("obj","call"), names(pamx)) stopifnot(all.equal(pamx [-ii], p2m [-ii], tolerance=1e-14), all.equal(pamx$objective[2], p2m$objective[2], tolerance=1e-14)) } pam(daisy(x, metric = "manhattan"), 2, diss = TRUE) data(ruspini) ## Plot similar to Figure 4 in Stryuf et al (1996) \dontrun{plot(pam(ruspini, 4), ask = TRUE)} \dontshow{plot(pam(ruspini, 4))} } \keyword{cluster} cluster/man/print.mona.Rd0000644000175100001440000000112707351402102015073 0ustar hornikusers\name{print.mona} \alias{print.mona} \title{Print Method for MONA Objects} \description{ Prints the ordering of objects, separation steps, and used variables of a \code{mona} object. This is a method for the function \code{\link{print}()} for objects inheriting from class \code{\link{mona}}. } \usage{ \method{print}{mona}(x, \dots) } \arguments{ \item{x}{a mona object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{ \code{\link{mona}}, \code{\link{mona.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} cluster/man/volume.ellipsoid.Rd0000644000175100001440000000160710371143174016312 0ustar hornikusers\name{volume.ellipsoid} \alias{volume} \alias{volume.ellipsoid} \title{Compute the Volume of Planar Object} \description{ Compute the volume of a planar object. This is a generic function and a method for \code{ellipsoid} objects. } \usage{ \method{volume}{ellipsoid}(object) } \arguments{ \item{object}{an \R object the volume of which is wanted; for the \code{ellipsoid} method, an object of that class (see \code{\link{ellipsoidhull}} or the example below).} } \value{ a number, the volume of the given \code{object}. } \seealso{\code{\link{ellipsoidhull}} for spanning ellipsoid computation.} \examples{ ## example(ellipsoidhull) # which defines `ellipsoid' object myEl <- structure(list(cov = rbind(c(3,1),1:2), loc = c(0,0), d2 = 10), class = "ellipsoid") volume(myEl)# i.e. "area" here (d = 2) myEl # also mentions the "volume" } \keyword{utilities} cluster/man/plot.agnes.Rd0000644000175100001440000001100711705523412015064 0ustar hornikusers\name{plot.agnes} %% almost identical to ./plot.diana.Rd and quite similar to ./plot.mona.Rd \alias{plot.agnes} \title{Plots of an Agglomerative Hierarchical Clustering} \description{ Creates plots for visualizing an \code{agnes} object. } \usage{ \method{plot}{agnes}(x, ask = FALSE, which.plots = NULL, main = NULL, sub = paste("Agglomerative Coefficient = ",round(x$ac, digits = 2)), adj = 0, nmax.lab = 35, max.strlen = 5, xax.pretty = TRUE, \dots) } \arguments{ \item{x}{an object of class \code{"agnes"}, typically created by \code{\link{agnes}(.)}.} \item{ask}{logical; if true and \code{which.plots} is \code{NULL}, \code{plot.agnes} operates in interactive mode, via \code{\link{menu}}.} \item{which.plots}{integer vector or NULL (default), the latter producing both plots. Otherwise, \code{which.plots} must contain integers of \code{1} for a \emph{banner} plot or \code{2} for a dendrogram or ``clustering tree''.} \item{main, sub}{main and sub title for the plot, with convenient defaults. See documentation for these arguments in \code{\link{plot.default}}.} \item{adj}{for label adjustment in \code{\link{bannerplot}()}.} \item{nmax.lab}{integer indicating the number of labels which is considered too large for single-name labelling the banner plot.} \item{max.strlen}{positive integer giving the length to which strings are truncated in banner plot labeling.} \item{xax.pretty}{logical or integer indicating if \code{\link{pretty}(*, n = xax.pretty)} should be used for the x axis. \code{xax.pretty = FALSE} is for back compatibility.} \item{\dots}{graphical parameters (see \code{\link{par}}) may also be supplied and are passed to \code{\link{bannerplot}()} or \code{pltree()} (see \code{\link{pltree.twins}}), respectively.} } \section{Side Effects}{ Appropriate plots are produced on the current graphics device. This can be one or both of the following choices: \cr Banner \cr Clustering tree } \details{ When \code{ask = TRUE}, rather than producing each plot sequentially, \code{plot.agnes} displays a menu listing all the plots that can be produced. If the menu is not desired but a pause between plots is still wanted one must set \code{par(ask= TRUE)} before invoking the plot command. The banner displays the hierarchy of clusters, and is equivalent to a tree. See Rousseeuw (1986) or chapter 5 of Kaufman and Rousseeuw (1990). The banner plots distances at which observations and clusters are merged. The observations are listed in the order found by the \code{agnes} algorithm, and the numbers in the \code{height} vector are represented as bars between the observations. The leaves of the clustering tree are the original observations. Two branches come together at the distance between the two clusters being merged. For more customization of the plots, rather call \code{\link{bannerplot}} and \code{pltree()}, i.e., its method \code{\link{pltree.twins}}, respectively. directly with corresponding arguments, e.g., \code{xlab} or \code{ylab}. } \note{ In the banner plot, observation labels are only printed when the number of observations is limited less than \code{nmax.lab} (35, by default), for readability. Moreover, observation labels are truncated to maximally \code{max.strlen} (5) characters. For the dendrogram, more flexibility than via \code{pltree()} is provided by \code{dg <- \link{as.dendrogram}(x)} and plotting \code{dg} via \code{\link[stats]{plot.dendrogram}}. } \references{ Kaufman, L. and Rousseeuw, P.J. (1990) \emph{Finding Groups in Data: An Introduction to Cluster Analysis}. Wiley, New York. Rousseeuw, P.J. (1986). A visual display for hierarchical classification, in \emph{Data Analysis and Informatics 4}; edited by E. Diday, Y. Escoufier, L. Lebart, J. Pages, Y. Schektman, and R. Tomassone. North-Holland, Amsterdam, 743--748. Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997) Integrating Robust Clustering Techniques in S-PLUS, \emph{Computational Statistics and Data Analysis}, \bold{26}, 17--37. } \seealso{ \code{\link{agnes}} and \code{\link{agnes.object}}; \code{\link{bannerplot}}, \code{\link{pltree.twins}}, and \code{\link{par}}. } \examples{ ## Can also pass `labels' to pltree() and bannerplot(): data(iris) cS <- as.character(Sp <- iris$Species) cS[Sp == "setosa"] <- "S" cS[Sp == "versicolor"] <- "V" cS[Sp == "virginica"] <- "g" ai <- agnes(iris[, 1:4]) plot(ai, labels = cS, nmax = 150)# bannerplot labels are mess } \keyword{cluster} \keyword{hplot} cluster/man/clusGap.Rd0000644000175100001440000002046212307650173014421 0ustar hornikusers\name{clusGap} \title{Gap Statistic for Estimating the Number of Clusters} \alias{clusGap} \alias{maxSE} \alias{print.clusGap} \alias{plot.clusGap} \description{ \code{clusGap()} calculates a goodness of clustering measure, the \dQuote{gap} statistic. For each number of clusters \eqn{k}, it compares \eqn{\log(W(k))} with \eqn{E^*[\log(W(k))]} where the latter is defined via bootstrapping, i.e. simulating from a reference distribution. \code{maxSE(f, SE.f)} determines the location of the \bold{maximum} of \code{f}, taking a \dQuote{1-SE rule} into account for the \code{*SE*} methods. The default method \code{"firstSEmax"} looks for the smallest \eqn{k} such that its value \eqn{f(k)} is not more than 1 standard error away from the first local maximum. This is similar but not the same as \code{"Tibs2001SEmax"}, Tibshirani et al's recommendation of determining the number of clusters from the gap statistics and their standard deviations. } \usage{ clusGap(x, FUNcluster, K.max, B = 100, verbose = interactive(), \dots) maxSE(f, SE.f, method = c("firstSEmax", "Tibs2001SEmax", "globalSEmax", "firstmax", "globalmax"), SE.factor = 1) \S3method{print}{clusGap}(x, method = "firstSEmax", SE.factor = 1, \dots) } \arguments{ \item{x}{numeric matrix or \code{\link{data.frame}}.} \item{FUNcluster}{a \code{\link{function}} which accepts as first argument a (data) matrix like \code{x}, second argument, say \eqn{k, k\geq 2}{k, k >= 2}, the number of clusters desired, and returns a \code{\link{list}} with a component named (or shortened to) \code{cluster} which is a vector of length \code{n = nrow(x)} of integers in \code{1:k} determining the clustering or grouping of the \code{n} observations.} \item{K.max}{the maximum number of clusters to consider, must be at least two.} \item{B}{integer, number of Monte Carlo (\dQuote{bootstrap}) samples.} \item{verbose}{integer or logical, determining if \dQuote{progress} output should be printed. The default prints one bit per bootstrap sample.} \item{\dots}{optionally further arguments for \code{FUNcluster()}, see \code{kmeans} example below.} \item{f}{numeric vector of \sQuote{function values}, of length \eqn{K}, whose (\dQuote{1 SE respected}) maximum we want.} \item{SE.f}{numeric vector of length \eqn{K} of standard errors of \code{f}.} \item{method}{character string indicating how the \dQuote{optimal} number of clusters, \eqn{\hat k}{k^}, is computed from the gap statistics (and their standard deviations), or more generally how the location \eqn{\hat k}{k^} of the maximum of \eqn{f_k}{f[k]} should be determined. %% -> ../R/clusGap.R \describe{ \item{\code{"globalmax"}:}{simply corresponds to the global maximum, i.e., is \code{which.max(f)}} \item{\code{"firstmax"}:}{gives the location of the first \emph{local} maximum.} \item{\code{"Tibs2001SEmax"}:}{uses the criterion, Tibshirani et al (2001) proposed: \dQuote{the smallest \eqn{k} such that \eqn{f(k) \ge f(k+1) - s_{k+1}}}. Note that this chooses \eqn{k = 1} when all standard deviations are larger than the differences \eqn{f(k+1) - f(k)}.} \item{\code{"firstSEmax"}:}{location of the first \eqn{f()} value which is not larger than the first \emph{local} maximum minus \code{SE.factor * SE.f[]}, i.e, within an \dQuote{f S.E.} range of that maximum (see also \code{SE.factor}).} \item{\code{"globalSEmax"}:}{(used in Dudoit and Fridlyand (2002), supposedly following Tibshirani's proposition): location of the first \eqn{f()} value which is not larger than the \emph{global} maximum minus \code{SE.factor * SE.f[]}, i.e, within an \dQuote{f S.E.} range of that maximum (see also \code{SE.factor}).} } See the examples for a comparison in a simple case. } \item{SE.factor}{[When \code{method} contains \code{"SE"}] Determining the optimal number of clusters, Tibshirani et al. proposed the \dQuote{1 S.E.}-rule. Using an \code{SE.factor} \eqn{f}, the \dQuote{f S.E.}-rule is used, more generally.} } \details{ The main result \code{$Tab[,"gap"]} of course is from bootstrapping aka Monte Carlo simulation and hence random, or equivalently, depending on the initial random seed (see \code{\link{set.seed}()}). On the other hand, in our experience, using \code{B = 500} gives quite precise results such that the gap plot is basically unchanged after an another run. } \value{ an object of S3 class \code{"clusGap"}, basically a list with components \item{Tab}{a matrix with \code{K.max} rows and 4 columns, named "logW", "E.logW", "gap", and "SE.sim", where \code{gap = E.logW - logW}, and \code{SE.sim} corresponds to the standard error of \code{gap}, \code{SE.sim[k]=}\eqn{s_k}{s[k]}, where \eqn{s_k := \sqrt{1 + 1/B} sd^*(gap_j)}{s[k] := sqrt(1 + 1/B) sd^*(gap[])}, and \eqn{sd^*()} is the standard deviation of the simulated (\dQuote{bootstrapped}) gap values. } \item{n}{number of observations, i.e., \code{nrow(x)}.} \item{B}{input \code{B}} \item{FUNcluster}{input function \code{FUNcluster}} } \references{ Tibshirani, R., Walther, G. and Hastie, T. (2001). Estimating the number of data clusters via the Gap statistic. \emph{Journal of the Royal Statistical Society B}, \bold{63}, 411--423. Tibshirani, R., Walther, G. and Hastie, T. (2000). Estimating the number of clusters in a dataset via the Gap statistic. Technical Report. Stanford. Per Broberg (2006). SAGx: Statistical Analysis of the GeneChip. R package version 1.9.7.% moved to Bioconductor sometime after 2006 \url{http://home.swipnet.se/pibroberg/expression_hemsida1.html} } \author{ This function is originally based on the functions \code{gap} of (Bioconductor) package \pkg{SAGx} by Per Broberg, \code{gapStat()} from former package \pkg{SLmisc} by Matthias Kohl and ideas from \code{gap()} and its methods of package \pkg{lga} by Justin Harrington. The current implementation is by Martin Maechler. } \seealso{ \code{\link{silhouette}} for a much simpler less sophisticated goodness of clustering measure. \code{\link[fpc]{cluster.stats}()} in package \pkg{fpc} for alternative measures. %\code{\link[SGAx]{gap}} in Bioconductor package \pkg{SGAx}. } \examples{ ### --- maxSE() methods ------------------------------------------- (mets <- eval(formals(maxSE)$method)) fk <- c(2,3,5,4,7,8,5,4) sk <- c(1,1,2,1,1,3,1,1)/2 ## use plot.clusGap(): plot(structure(class="clusGap", list(Tab = cbind(gap=fk, SE.sim=sk)))) ## Note that 'firstmax' and 'globalmax' are always at 3 and 6 : sapply(c(1/4, 1,2,4), function(SEf) sapply(mets, function(M) maxSE(fk, sk, method = M, SE.factor = SEf))) ### --- clusGap() ------------------------------------------------- ## ridiculously nicely separated clusters in 3 D : x <- rbind(matrix(rnorm(150, sd = 0.1), ncol = 3), matrix(rnorm(150, mean = 1, sd = 0.1), ncol = 3), matrix(rnorm(150, mean = 2, sd = 0.1), ncol = 3), matrix(rnorm(150, mean = 3, sd = 0.1), ncol = 3)) ## Slightly faster way to use pam (see below) pam1 <- function(x,k) list(cluster = pam(x,k, cluster.only=TRUE)) doExtras <- cluster:::doExtras() ## or set it explicitly to TRUE for the following if(doExtras) { ## Note we use B = 60 in the following examples to keep them "speedy". ## ---- rather keep the default B = 500 for your analysis! ## note we can pass 'nstart = 20' to kmeans() : gskmn <- clusGap(x, FUN = kmeans, nstart = 20, K.max = 8, B = 60) gskmn #-> its print() method plot(gskmn, main = "clusGap(., FUN = kmeans, n.start=20, B= 60)") set.seed(12); system.time( gsPam0 <- clusGap(x, FUN = pam, K.max = 8, B = 60) ) set.seed(12); system.time( gsPam1 <- clusGap(x, FUN = pam1, K.max = 8, B = 60) ) ## and show that it gives the same: stopifnot(identical(gsPam1[-4], gsPam0[-4])) gsPam1 print(gsPam1, method="globalSEmax") print(gsPam1, method="globalmax") }% end {doExtras} gs.pam.RU <- clusGap(ruspini, FUN = pam1, K.max = 8, B = 60) gs.pam.RU plot(gs.pam.RU, main = "Gap statistic for the 'ruspini' data") mtext("k = 4 is best .. and k = 5 pretty close") \donttest{## This takes a minute.. ## No clustering ==> k = 1 ("one cluster") should be optimal: Z <- matrix(rnorm(256*3), 256,3) gsP.Z <- clusGap(Z, FUN = pam1, K.max = 8, B = 200) plot(gsP.Z) gsP.Z }%end{dont..} } \keyword{cluster} cluster/man/print.dissimilarity.Rd0000644000175100001440000000303210224260017017024 0ustar hornikusers\title{Print and Summary Methods for Dissimilarity Objects} \name{print.dissimilarity} \alias{print.dissimilarity} \alias{summary.dissimilarity} \alias{print.summary.dissimilarity} \description{ Print or summarize the distances and the attributes of a \code{dissimilarity} object. These are methods for the functions \code{print()} and \code{summary()} for \code{dissimilarity} objects. See \code{print}, \code{print.default}, or \code{summary} for the general behavior of these. } \usage{ \method{print}{dissimilarity}(x, diag = NULL, upper = NULL, digits = getOption("digits"), justify = "none", right = TRUE, \dots) \method{summary}{dissimilarity}(object, digits = max(3, getOption("digits") - 2), \dots) \method{print}{summary.dissimilarity}(x, \dots) } \arguments{ \item{x, object}{a \code{dissimilarity} object or a \code{summary.dissimilarity} one for \code{print.summary.dissimilarity()}.} \item{digits}{the number of digits to use, see \code{\link{print.default}}.} \item{diag, upper, justify, right}{optional arguments specifying how the triangular dissimilarity matrix is printed; see \code{\link[stats]{print.dist}}.} \item{\dots}{potential further arguments (require by generic).} } \seealso{ \code{\link{daisy}}, \code{\link{dissimilarity.object}}, \code{\link{print}}, \code{\link{print.default}}, \code{\link{print.dist}}. } \examples{ ## See example(daisy) sd <- summary(daisy(matrix(rnorm(100), 20,5))) sd # -> print.summary.dissimilarity(.) str(sd) } \keyword{cluster} \keyword{print} cluster/man/animals.Rd0000644000175100001440000000204211711740461014437 0ustar hornikusers\name{animals} \alias{animals} \title{Attributes of Animals} \usage{data(animals)} \description{ This data set considers 6 binary attributes for 20 animals. } \format{ A data frame with 20 observations on 6 variables: \tabular{rll}{ [ , 1] \tab war \tab warm-blooded \cr [ , 2] \tab fly \tab can fly \cr [ , 3] \tab ver \tab vertebrate \cr [ , 4] \tab end \tab endangered \cr [ , 5] \tab gro \tab live in groups \cr [ , 6] \tab hai \tab have hair \cr } All variables are encoded as 1 = `no', 2 = `yes'. } \source{ Leonard Kaufman and Peter J. Rousseeuw (1990): \emph{Finding Groups in Data} (pp 297ff). New York: Wiley. } \details{ This dataset is useful for illustrating monothetic (only a single variable is used for each split) hierarchical clustering. } \references{ see Struyf, Hubert & Rousseeuw (1996), in \code{\link{agnes}}. } \examples{ data(animals) apply(animals,2, table) # simple overview ma <- mona(animals) ma ## Plot similar to Figure 10 in Struyf et al (1996) plot(ma) } \keyword{datasets} cluster/man/predict.ellipsoid.Rd0000644000175100001440000000416411626750550016444 0ustar hornikusers\name{predict.ellipsoid} \alias{predict.ellipsoid} \alias{ellipsoidPoints} \title{Predict Method for Ellipsoid Objects} \description{ Compute points on the ellipsoid boundary, mostly for drawing. } \usage{ % method *and* stand alone on purpose : predict.ellipsoid(object, n.out=201, \dots) \method{predict}{ellipsoid}(object, n.out=201, \dots) ellipsoidPoints(A, d2, loc, n.half = 201) } \arguments{ \item{object}{an object of class \code{ellipsoid}, typically from \code{\link{ellipsoidhull}()}; alternatively any list-like object with proper components, see details below.} \item{n.out, n.half}{half the number of points to create.} \item{A, d2, loc}{arguments of the auxilary \code{ellipsoidPoints}, see below.} \item{\dots}{passed to and from methods.} } \details{ Note \code{ellipsoidPoints} is the workhorse function of \code{predict.ellipsoid} a standalone function and method for \code{ellipsoid} objects, see \code{\link{ellipsoidhull}}. The class of \code{object} is not checked; it must solely have valid components \code{loc} (length \eqn{p}), the \eqn{p \times p}{p x p} matrix \code{cov} (corresponding to \code{A}) and \code{d2} for the center, the shape (``covariance'') matrix and the squared average radius (or distance) or \code{\link{qchisq}(*, p)} quantile. Unfortunately, this is only implemented for \eqn{p = 2}, currently; contributions for \eqn{p \ge 3}{p >= 3} are \emph{very welcome}. } \value{ a numeric matrix of dimension \code{2*n.out} times \eqn{p}. } \seealso{\code{\link{ellipsoidhull}}, \code{\link{volume.ellipsoid}}. } \examples{ ## see also example(ellipsoidhull) ## Robust vs. L.S. covariance matrix set.seed(143) x <- rt(200, df=3) y <- 3*x + rt(200, df=2) plot(x,y, main="non-normal data (N=200)") mtext("with classical and robust cov.matrix ellipsoids") X <- cbind(x,y) C.ls <- cov(X) ; m.ls <- colMeans(X) d2.99 <- qchisq(0.99, df = 2) lines(ellipsoidPoints(C.ls, d2.99, loc=m.ls), col="green") if(require(MASS)) { Cxy <- cov.rob(cbind(x,y)) lines(ellipsoidPoints(Cxy$cov, d2 = d2.99, loc=Cxy$center), col="red") }# MASS } \keyword{dplot} \keyword{utilities} cluster/man/pam.object.Rd0000644000175100001440000000663412461440602015045 0ustar hornikusers\name{pam.object} \alias{pam.object} \title{Partitioning Around Medoids (PAM) Object} \description{ The objects of class \code{"pam"} represent a partitioning of a dataset into clusters. } \section{GENERATION}{ These objects are returned from \code{\link{pam}}.} \section{METHODS}{ The \code{"pam"} class has methods for the following generic functions: \code{print}, \code{summary}. } \section{INHERITANCE}{ The class \code{"pam"} inherits from \code{"partition"}. Therefore, the generic functions \code{plot} and \code{clusplot} can be used on a \code{pam} object. } \value{ A legitimate \code{pam} object is a \code{\link{list}} with the following components: \item{medoids}{ the medoids or representative objects of the clusters. If a dissimilarity matrix was given as input to \code{pam}, then a vector of numbers or labels of observations is given, else \code{medoids} is a \code{\link{matrix}} with in each row the coordinates of one medoid.} \item{id.med}{integer vector of \emph{indices} giving the medoid observation numbers.} \item{clustering}{the clustering vector, see \code{\link{partition.object}}.} \item{objective}{the objective function after the first and second step of the \code{pam} algorithm.} \item{isolation}{ vector with length equal to the number of clusters, specifying which clusters are isolated clusters (L- or L*-clusters) and which clusters are not isolated.\cr A cluster is an L*-cluster iff its diameter is smaller than its separation. A cluster is an L-cluster iff for each observation i the maximal dissimilarity between i and any other observation of the cluster is smaller than the minimal dissimilarity between i and any observation of another cluster. Clearly each L*-cluster is also an L-cluster. } \item{clusinfo}{ matrix, each row gives numerical information for one cluster. These are the cardinality of the cluster (number of observations), the maximal and average dissimilarity between the observations in the cluster and the cluster's medoid, %% FIXME: Now differs from clara.object.Rd: the diameter of the cluster (maximal dissimilarity between two observations of the cluster), and the separation of the cluster (minimal dissimilarity between an observation of the cluster and an observation of another cluster). } \item{silinfo}{list with silhouette width information, see \code{\link{partition.object}}.} \item{diss}{dissimilarity (maybe NULL), see \code{\link{partition.object}}.} \item{call}{generating call, see \code{\link{partition.object}}.} \item{data}{(possibibly standardized) see \code{\link{partition.object}}.} } \seealso{ \code{\link{pam}}, \code{\link{dissimilarity.object}}, \code{\link{partition.object}}, \code{\link{plot.partition}}. } \examples{ ## Use the silhouette widths for assessing the best number of clusters, ## following a one-dimensional example from Christian Hennig : ## x <- c(rnorm(50), rnorm(50,mean=5), rnorm(30,mean=15)) asw <- numeric(20) ## Note that "k=1" won't work! for (k in 2:20) asw[k] <- pam(x, k) $ silinfo $ avg.width k.best <- which.max(asw) cat("silhouette-optimal number of clusters:", k.best, "\n") plot(1:20, asw, type= "h", main = "pam() clustering assessment", xlab= "k (# clusters)", ylab = "average silhouette width") axis(1, k.best, paste("best",k.best,sep="\n"), col = "red", col.axis = "red") } \keyword{cluster} cluster/man/flower.Rd0000644000175100001440000000326711711740461014323 0ustar hornikusers\name{flower} \alias{flower} \title{Flower Characteristics} \usage{data(flower)} \description{8 characteristics for 18 popular flowers.} \format{ A data frame with 18 observations on 8 variables: \tabular{rll}{ [ , "V1"] \tab factor \tab winters \cr [ , "V2"] \tab factor \tab shadow \cr [ , "V3"] \tab factor \tab tubers \cr [ , "V4"] \tab factor \tab color \cr [ , "V5"] \tab ordered \tab soil \cr [ , "V6"] \tab ordered \tab preference \cr [ , "V7"] \tab numeric \tab height \cr [ , "V8"] \tab numeric \tab distance } \describe{ \item{V1}{winters, is binary and indicates whether the plant may be left in the garden when it freezes.} \item{V2}{shadow, is binary and shows whether the plant needs to stand in the shadow.} \item{V3}{tubers, is asymmetric binary and distinguishes between plants with tubers and plants that grow in any other way.} \item{V4}{color, is nominal and specifies the flower's color (1 = white, 2 = yellow, 3 = pink, 4 = red, 5 = blue).} \item{V5}{soil, is ordinal and indicates whether the plant grows in dry (1), normal (2), or wet (3) soil.} \item{V6}{preference, is ordinal and gives someone's preference ranking going from 1 to 18.} \item{V7}{height, is interval scaled, the plant's height in centimeters.} \item{V8}{distance, is interval scaled, the distance in centimeters that should be left between the plants.} } } \references{ Struyf, Hubert and Rousseeuw (1996), see \code{\link{agnes}}. } \examples{ data(flower) ## Example 2 in ref daisy(flower, type = list(asymm = 3)) daisy(flower, type = list(asymm = c(1, 3), ordratio = 7)) } \keyword{datasets} cluster/man/summary.agnes.Rd0000644000175100001440000000122010370161217015575 0ustar hornikusers\name{summary.agnes} \alias{summary.agnes} \alias{print.summary.agnes} \title{Summary Method for `agnes' Objects} \description{ Returns (and prints) a summary list for an \code{agnes} object. Printing gives more output than the corresponding \code{\link{print.agnes}} method. } \usage{ \method{summary}{agnes}(object, \dots) \method{print}{summary.agnes}(x, \dots) } \arguments{ \item{x, object}{a \code{\link{agnes}} object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{\code{\link{agnes}}, \code{\link{agnes.object}}.} \examples{ data(agriculture) summary(agnes(agriculture)) } \keyword{cluster} \keyword{print} cluster/man/fanny.object.Rd0000644000175100001440000000533310500041124015362 0ustar hornikusers\name{fanny.object} \alias{fanny.object} \title{Fuzzy Analysis (FANNY) Object} \description{ The objects of class \code{"fanny"} represent a fuzzy clustering of a dataset. } \section{GENERATION}{ These objects are returned from \code{\link{fanny}}. } \section{METHODS}{ The \code{"fanny"} class has methods for the following generic functions: \code{print}, \code{summary}. } \section{INHERITANCE}{ The class \code{"fanny"} inherits from \code{"partition"}. Therefore, the generic functions \code{plot} and \code{clusplot} can be used on a \code{fanny} object. } \value{ A legitimate \code{fanny} object is a list with the following components: \item{membership}{ matrix containing the memberships for each pair consisting of an observation and a cluster. } \item{memb.exp}{the membership exponent used in the fitting criterion.} \item{coeff}{ Dunn's partition coefficient \eqn{F(k)} of the clustering, where \eqn{k} is the number of clusters. \eqn{F(k)} is the sum of all \emph{squared} membership coefficients, divided by the number of observations. Its value is between \eqn{1/k} and 1. The normalized form of the coefficient is also given. It is defined as \eqn{(F(k) - 1/k) / (1 - 1/k)}, and ranges between 0 and 1. A low value of Dunn's coefficient indicates a very fuzzy clustering, whereas a value close to 1 indicates a near-crisp clustering. } \item{clustering}{ the clustering vector of the nearest crisp clustering, see \code{\link{partition.object}}.} \item{k.crisp}{integer (\eqn{\le k}{<= k}) giving the number of \emph{crisp} clusters; can be less than \eqn{k}, where it's recommended to decrease \code{memb.exp}.} \item{objective}{ named vector containing the minimal value of the objective function reached by the FANNY algorithm and the relative convergence tolerance \code{tol} used.% + still has 'iterations' for back-compatibility } \item{convergence}{ named vector with \code{iterations}, the number of iterations needed and \code{converged} indicating if the algorithm converged (in \code{maxit} iterations within convergence tolerance \code{tol}). } \item{diss}{ an object of class \code{"dissimilarity"}, see \code{\link{partition.object}}.} \item{call}{generating call, see \code{\link{partition.object}}.} \item{silinfo}{ list with silhouette information of the nearest crisp clustering, see \code{\link{partition.object}}.} \item{data}{matrix, possibibly standardized, or NULL, see \code{\link{partition.object}}.} } \seealso{ \code{\link{fanny}}, \code{\link{print.fanny}}, \code{\link{dissimilarity.object}}, \code{\link{partition.object}}, \code{\link{plot.partition}}. } \keyword{cluster} cluster/man/print.agnes.Rd0000644000175100001440000000134607520566217015261 0ustar hornikusers\name{print.agnes} \alias{print.agnes} \title{Print Method for AGNES Objects} \description{ Prints the call, agglomerative coefficient, ordering of objects and distances between merging clusters (`Height') of an \code{agnes} object. This is a method for the generic \code{\link{print}()} function for objects inheriting from class \code{agnes}, see \code{\link{agnes.object}}. } \usage{ \method{print}{agnes}(x, \dots) } \arguments{ \item{x}{an agnes object.} \item{\dots}{potential further arguments (required by generic).} } \seealso{ \code{\link{summary.agnes}} producing more output; \code{\link{agnes}}, \code{\link{agnes.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} cluster/man/summary.diana.Rd0000644000175100001440000000077010370161217015565 0ustar hornikusers\name{summary.diana} \alias{summary.diana} \alias{print.summary.diana} \title{Summary Method for `diana' Objects} \description{Returns (and prints) a summary list for a \code{diana} object.} \usage{ \method{summary}{diana}(object, \dots) \method{print}{summary.diana}(x, \dots) } \arguments{ \item{x, object}{a \code{\link{diana}} object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{\code{\link{diana}}, \code{\link{diana.object}}.} \keyword{cluster} \keyword{print} cluster/man/ellipsoidhull.Rd0000644000175100001440000001152412456276724015706 0ustar hornikusers\name{ellipsoidhull} \alias{ellipsoidhull} \alias{print.ellipsoid} \title{Compute the Ellipsoid Hull or Spanning Ellipsoid of a Point Set} \description{ Compute the \dQuote{ellipsoid hull} or \dQuote{spanning ellipsoid}, i.e. the ellipsoid of minimal volume (\sQuote{area} in 2D) such that all given points lie just inside or on the boundary of the ellipsoid. } \usage{ ellipsoidhull(x, tol=0.01, maxit=5000, ret.wt = FALSE, ret.sqdist = FALSE, ret.pr = FALSE) \method{print}{ellipsoid}(x, digits = max(1, getOption("digits") - 2), \dots) } \arguments{ \item{x}{the \eqn{n} \eqn{p}-dimensional points asnumeric \eqn{n\times p}{n x p} matrix.} \item{tol}{convergence tolerance for Titterington's algorithm. Setting this to much smaller values may drastically increase the number of iterations needed, and you may want to increas \code{maxit} as well.} \item{maxit}{integer giving the maximal number of iteration steps for the algorithm.} \item{ret.wt, ret.sqdist, ret.pr}{logicals indicating if additional information should be returned, \code{ret.wt} specifying the \emph{weights}, \code{ret.sqdist} the \emph{\bold{sq}uared \bold{dist}ances} and \code{ret.pr} the final \bold{pr}obabilities in the algorithms.} \item{digits,\dots}{the usual arguments to \code{\link{print}} methods.} } \details{ The \dQuote{spanning ellipsoid} algorithm is said to stem from Titterington(1976), in Pison et al (1999) who use it for \code{\link{clusplot.default}}.\cr The problem can be seen as a special case of the \dQuote{Min.Vol.} ellipsoid of which a more more flexible and general implementation is \code{\link[MASS]{cov.mve}} in the \code{MASS} package. } \value{ an object of class \code{"ellipsoid"}, basically a \code{\link{list}} with several components, comprising at least \item{cov}{\eqn{p\times p}{p x p} \emph{covariance} matrix description the ellipsoid.} \item{loc}{\eqn{p}-dimensional location of the ellipsoid center.} \item{d2}{average squared radius. Further, \eqn{d2 = t^2}, where \eqn{t} is \dQuote{the value of a t-statistic on the ellipse boundary} (from \code{\link[ellipse]{ellipse}} in the \pkg{ellipse} package), and hence, more usefully, \code{d2 = qchisq(alpha, df = p)}, where \code{alpha} is the confidence level for p-variate normally distributed data with location and covariance \code{loc} and \code{cov} to lie inside the ellipsoid.} \item{wt}{the vector of weights iff \code{ret.wt} was true.} \item{sqdist}{the vector of squared distances iff \code{ret.sqdist} was true.} \item{prob}{the vector of algorithm probabilities iff \code{ret.pr} was true.} \item{it}{number of iterations used.} \item{tol, maxit}{just the input argument, see above.} \item{eps}{the achieved tolerance which is the maximal squared radius minus \eqn{p}.} \item{ierr}{error code as from the algorithm; \code{0} means \emph{ok}.} \item{conv}{logical indicating if the converged. This is defined as \code{it < maxit && ierr == 0}.} } \references{ Pison, G., Struyf, A. and Rousseeuw, P.J. (1999) Displaying a Clustering with CLUSPLOT, \emph{Computational Statistics and Data Analysis}, \bold{30}, 381--392.\cr %% Jan.2015 : no longer there: %% A version of this is available as technical report from %% \url{http://www.agoras.ua.ac.be/abstract/Disclu99.htm} D.M. Titterington (1976) Algorithms for computing D-optimal design on finite design spaces. In \emph{Proc.\ of the 1976 Conf.\ on Information Science and Systems}, 213--216; John Hopkins University. } \author{Martin Maechler did the present class implementation; Rousseeuw et al did the underlying code.} \seealso{\code{\link{predict.ellipsoid}} which is also the \code{\link{predict}} method for \code{ellipsoid} objects. \code{\link{volume.ellipsoid}} for an example of \sQuote{manual} \code{ellipsoid} object construction;\cr further \code{\link[ellipse]{ellipse}} from package \pkg{ellipse} and \code{\link[sfsmisc]{ellipsePoints}} from package \pkg{sfsmisc}. \code{\link[grDevices]{chull}} for the convex hull, \code{\link{clusplot}} which makes use of this; \code{\link[MASS]{cov.mve}}. } \examples{ x <- rnorm(100) xy <- unname(cbind(x, rnorm(100) + 2*x + 10)) exy <- ellipsoidhull(xy) exy # >> calling print.ellipsoid() plot(xy, main = "ellipsoidhull() -- 'spanning points'") lines(predict(exy), col="blue") points(rbind(exy$loc), col = "red", cex = 3, pch = 13) exy <- ellipsoidhull(xy, tol = 1e-7, ret.wt = TRUE, ret.sq = TRUE) str(exy) # had small `tol', hence many iterations (ii <- which(zapsmall(exy $ wt) > 1e-6)) ## --> only about 4 to 6 "spanning ellipsoid" points round(exy$wt[ii],3); sum(exy$wt[ii]) # weights summing to 1 points(xy[ii,], pch = 21, cex = 2, col="blue", bg = adjustcolor("blue",0.25)) } \keyword{dplot} \keyword{hplot}% << ? chull has "hplot" as well. cluster/man/print.diana.Rd0000644000175100001440000000116507351402102015217 0ustar hornikusers\name{print.diana} \alias{print.diana} \title{Print Method for DIANA Objects} \description{ Prints the ordering of objects, diameters of splitted clusters, and divisive coefficient of a \code{diana} object. This is a method for the function \code{\link{print}()} for objects inheriting from class \code{\link{diana}}. } \usage{ \method{print}{diana}(x, \dots) } \arguments{ \item{x}{a diana object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{ \code{\link{diana}}, \code{\link{diana.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} cluster/man/votes.repub.Rd0000644000175100001440000000113407351402102015260 0ustar hornikusers\name{votes.repub} \alias{votes.repub} \title{Votes for Republican Candidate in Presidential Elections} \usage{data(votes.repub)} \description{ A data frame with the percents of votes given to the republican candidate in presidential elections from 1856 to 1976. Rows represent the 50 states, and columns the 31 elections. } \source{ S. Peterson (1973): \emph{A Statistical History of the American Presidential Elections}. New York: Frederick Ungar Publishing Co. Data from 1964 to 1976 is from R. M. Scammon, \emph{American Votes 12}, Congressional Quarterly. } \keyword{datasets} cluster/man/dissimilarity.object.Rd0000644000175100001440000000463210370161217017151 0ustar hornikusers\name{dissimilarity.object} \alias{dissimilarity.object} \title{Dissimilarity Matrix Object} \description{ Objects of class \code{"dissimilarity"} representing the dissimilarity matrix of a dataset. } \section{GENERATION}{ \code{\link{daisy}} returns this class of objects. Also the functions \code{pam}, \code{clara}, \code{fanny}, \code{agnes}, and \code{diana} return a \code{dissimilarity} object, as one component of their return objects. } \section{METHODS}{ The \code{"dissimilarity"} class has methods for the following generic functions: \code{print}, \code{summary}. } \value{ The dissimilarity matrix is symmetric, and hence its lower triangle (column wise) is represented as a vector to save storage space. If the object, is called \code{do}, and \code{n} the number of observations, i.e., \code{n <- attr(do, "Size")}, then for \eqn{i < j <= n}, the dissimilarity between (row) i and j is \code{do[n*(i-1) - i*(i-1)/2 + j-i]}. The length of the vector is \eqn{n*(n-1)/2}, i.e., of order \eqn{n^2}. \code{"dissimilarity"} objects also inherit from class \code{\link{dist}} and can use \code{dist} methods, in particular, \code{\link{as.matrix}}, such that \eqn{d_{ij}}{d(i,j)} from above is just \code{as.matrix(do)[i,j]}. The object has the following attributes: \item{Size}{the number of observations in the dataset.} \item{Metric}{the metric used for calculating the dissimilarities. Possible values are "euclidean", "manhattan", "mixed" (if variables of different types were present in the dataset), and "unspecified".} \item{Labels}{optionally, contains the labels, if any, of the observations of the dataset.} \item{NA.message}{optionally, if a dissimilarity could not be computed, because of too many missing values for some observations of the dataset.} \item{Types}{when a mixed metric was used, the types for each variable as one-letter codes (as in the book, e.g. p.54): \describe{ \item{A}{Asymmetric binary} \item{S}{Symmetric binary} \item{N}{Nominal (factor)} \item{O}{Ordinal (ordered factor)} \item{I}{Interval scaled (numeric)} \item{T}{raTio to be log transformed (positive numeric)} }.} } \seealso{ \code{\link{daisy}}, \code{\link{dist}}, \code{\link{pam}}, \code{\link{clara}}, \code{\link{fanny}}, \code{\link{agnes}}, \code{\link{diana}}. } %\examples{} --> ./daisy.Rd \keyword{cluster} cluster/man/summary.clara.Rd0000644000175100001440000000241010370161217015564 0ustar hornikusers\name{summary.clara} \alias{summary.clara} \alias{print.summary.clara} \title{Summary Method for `clara' Objects} \description{ Returns (and prints) a summary list for a \code{clara} object. Printing gives more output than the corresponding \code{\link{print.clara}} method. } \usage{ \method{summary}{clara}(object, \dots) \method{print}{summary.clara}(x, \dots) } \arguments{ \item{x, object}{a \code{\link{clara}} object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{\code{\link{clara.object}}} \examples{ ## generate 2000 objects, divided into 5 clusters. set.seed(47) x <- rbind(cbind(rnorm(400, 0,4), rnorm(400, 0,4)), cbind(rnorm(400,10,8), rnorm(400,40,6)), cbind(rnorm(400,30,4), rnorm(400, 0,4)), cbind(rnorm(400,40,4), rnorm(400,20,2)), cbind(rnorm(400,50,4), rnorm(400,50,4)) ) clx5 <- clara(x, 5) ## Mis`classification' table: % R version >= 1.5 : % table(rep(1:5, each = 400), clx5$clust) # -> 1 "error" table(rep(1:5, rep(400,5)), clx5$clust) # -> 1 "error" summary(clx5) ## Graphically: par(mfrow = c(3,1), mgp = c(1.5, 0.6, 0), mar = par("mar") - c(0,0,2,0)) %>1.5: plot(x, col = rep(2:6, each = 400)) plot(x, col = rep(2:6, rep(400,5))) plot(clx5) } \keyword{cluster} \keyword{print} cluster/man/summary.pam.Rd0000644000175100001440000000104710370161217015264 0ustar hornikusers\name{summary.pam} \alias{summary.pam} \alias{print.summary.pam} \title{Summary Method for PAM Objects} \description{Summarize a \code{\link{pam}} object and return an object of class \code{summary.pam}. There's a \code{\link{print}} method for the latter. } \usage{ \method{summary}{pam}(object, \dots) \method{print}{summary.pam}(x, \dots) } \arguments{ \item{x, object}{a \code{\link{pam}} object.} \item{\dots}{potential further arguments (require by generic).} } \seealso{\code{\link{pam}}, \code{\link{pam.object}}. } \keyword{cluster} cluster/man/clara.Rd0000644000175100001440000001740211573376173014116 0ustar hornikusers\name{clara} \alias{clara} \title{Clustering Large Applications} \description{ Computes a \code{"clara"} object, a list representing a clustering of the data into \code{k} clusters. } \usage{ clara(x, k, metric = "euclidean", stand = FALSE, samples = 5, sampsize = min(n, 40 + 2 * k), trace = 0, medoids.x = TRUE, keep.data = medoids.x, rngR = FALSE, pamLike = FALSE) } \arguments{ \item{x}{ data matrix or data frame, each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (NAs) are allowed.} \item{k}{integer, the number of clusters. It is required that \eqn{0 < k < n} where \eqn{n} is the number of observations (i.e., n = \code{nrow(x)}).} \item{metric}{ character string specifying the metric to be used for calculating dissimilarities between observations. The currently available options are "euclidean" and "manhattan". Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences. } \item{stand}{logical, indicating if the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. } \item{samples}{integer, number of samples to be drawn from the dataset. The default, \code{5}, is rather small for historical (and now back compatibility) reasons and we recommend to set \code{samples} an order of magnitude larger. } \item{sampsize}{integer, number of observations in each sample. \code{sampsize} should be higher than the number of clusters (\code{k}) and at most the number of observations (n = \code{nrow(x)}).} \item{trace}{integer indicating a \emph{trace level} for diagnostic output during the algorithm.} \item{medoids.x}{logical indicating if the medoids should be returned, identically to some rows of the input data \code{x}. If \code{FALSE}, \code{keep.data} must be false as well, and the medoid indices, i.e., row numbers of the medoids will still be returned (\code{i.med} component), and the algorithm saves space by needing one copy less of \code{x}.} \item{keep.data}{logical indicating if the (\emph{scaled} if \code{stand} is true) data should be kept in the result. % (\code{keepdata} is equivalent to \code{keep.data} where the former % is deprecated.) Setting this to \code{FALSE} saves memory (and hence time), but disables \code{\link{clusplot}()}ing of the result. Use \code{medoids.x = FALSE} to save even more memory.} \item{rngR}{logical indicating if \R's random number generator should be used instead of the primitive clara()-builtin one. If true, this also means that each call to \code{clara()} returns a different result -- though only slightly different in good situations.} \item{pamLike}{logical indicating if the \dQuote{swap} phase (see \code{\link{pam}}, in C code) should use the same algorithm as \code{\link{pam}()}. Note that from Kaufman and Rousseeuw's description this \emph{should} have been true always, but as the original Fortran code and the subsequent port to C has always contained a small one-letter change (a typo according to Martin Maechler) with respect to PAM, the default, \code{pamLike = FALSE} has been chosen to remain back compatible rather than \dQuote{PAM compatible}.} } \value{ an object of class \code{"clara"} representing the clustering. See \code{\link{clara.object}} for details. } \details{ \code{clara} is fully described in chapter 3 of Kaufman and Rousseeuw (1990). Compared to other partitioning methods such as \code{pam}, it can deal with much larger datasets. Internally, this is achieved by considering sub-datasets of fixed size (\code{sampsize}) such that the time and storage requirements become linear in \eqn{n} rather than quadratic. Each sub-dataset is partitioned into \code{k} clusters using the same algorithm as in \code{\link{pam}}.\cr Once \code{k} representative objects have been selected from the sub-dataset, each observation of the entire dataset is assigned to the nearest medoid. The mean (equivalent to the sum) of the dissimilarities of the observations to their closest medoid is used as a measure of the quality of the clustering. The sub-dataset for which the mean (or sum) is minimal, is retained. A further analysis is carried out on the final partition. Each sub-dataset is forced to contain the medoids obtained from the best sub-dataset until then. Randomly drawn observations are added to this set until \code{sampsize} has been reached. } \note{ %% mostly by Martin Maechler : By default, the random sampling is implemented with a \emph{very} simple scheme (with period \eqn{2^{16} = 65536}) inside the Fortran code, independently of \R's random number generation, and as a matter of fact, deterministically. Alternatively, we recommend setting \code{rngR = TRUE} which uses \R's random number generators. Then, \code{clara()} results are made reproducible typically by using \code{\link{set.seed}()} before calling \code{clara}. The storage requirement of \code{clara} computation (for small \code{k}) is about \eqn{O(n \times p) + O(j^2)}{O(n * p) + O(j^2)} where \eqn{j = \code{sampsize}}, and \eqn{(n,p) = \code{dim(x)}}. The CPU computing time (again assuming small \code{k}) is about \eqn{O(n \times p \times j^2 \times N)}{O(n * p * j^2 * N)}, where \eqn{N = \code{samples}}. For \dQuote{small} datasets, the function \code{\link{pam}} can be used directly. What can be considered \emph{small}, is really a function of available computing power, both memory (RAM) and speed. Originally (1990), \dQuote{small} meant less than 100 observations; in 1997, the authors said \emph{\dQuote{small (say with fewer than 200 observations)}}; as of 2006, you can use \code{\link{pam}} with several thousand observations. } \author{ Kaufman and Rousseeuw (see \code{\link{agnes}}), originally. All arguments from \code{trace} on, and most \R documentation and all tests by Martin Maechler. } \seealso{ \code{\link{agnes}} for background and references; \code{\link{clara.object}}, \code{\link{pam}}, \code{\link{partition.object}}, \code{\link{plot.partition}}. } \examples{ ## generate 500 objects, divided into 2 clusters. x <- rbind(cbind(rnorm(200,0,8), rnorm(200,0,8)), cbind(rnorm(300,50,8), rnorm(300,50,8))) clarax <- clara(x, 2, samples=50) clarax clarax$clusinfo ## using pamLike=TRUE gives the same (apart from the 'call'): all.equal(clarax[-8], clara(x, 2, samples=50, pamLike = TRUE)[-8]) plot(clarax) ## `xclara' is an artificial data set with 3 clusters of 1000 bivariate ## objects each. data(xclara) (clx3 <- clara(xclara, 3)) ## "better" number of samples cl.3 <- clara(xclara, 3, samples=100) ## but that did not change the result here: stopifnot(cl.3$clustering == clx3$clustering) ## Plot similar to Figure 5 in Struyf et al (1996) \dontrun{plot(clx3, ask = TRUE)} \dontshow{plot(clx3)} ## Try 100 times *different* random samples -- for reliability: nSim <- 100 nCl <- 3 # = no.classes set.seed(421)# (reproducibility) cl <- matrix(NA,nrow(xclara), nSim) for(i in 1:nSim) cl[,i] <- clara(xclara, nCl, medoids.x = FALSE, rngR = TRUE)$cluster tcl <- apply(cl,1, tabulate, nbins = nCl) ## those that are not always in same cluster (5 out of 3000 for this seed): (iDoubt <- which(apply(tcl,2, function(n) all(n < nSim)))) if(length(iDoubt)) { # (not for all seeds) tabD <- tcl[,iDoubt, drop=FALSE] dimnames(tabD) <- list(cluster = paste(1:nCl), obs = format(iDoubt)) t(tabD) # how many times in which clusters } } \keyword{cluster} cluster/man/xclara.Rd0000644000175100001440000000120711711740461014267 0ustar hornikusers\name{xclara} \alias{xclara} \title{Bivariate Data Set with 3 Clusters} \description{ An artificial data set consisting of 3000 points in 3 well-separated clusters of size 1000 each. } \usage{data(xclara)} \format{ A data frame with 3000 observations on 2 numeric variables giving the \eqn{x} and \eqn{y} coordinates of the points, respectively. } \source{ Sample data set accompanying the reference below. } \references{ Anja Struyf, Mia Hubert & Peter J. Rousseeuw (1996) Clustering in an Object-Oriented Environment. \emph{Journal of Statistical Software} \bold{1}. \url{http://www.jstatsoft.org/v01/i04} } \keyword{datasets} cluster/man/coef.hclust.Rd0000644000175100001440000000442511674345261015246 0ustar hornikusers\name{coef.hclust} \alias{coefHier} \alias{coef.hclust} \alias{coef.twins} \title{Agglomerative / Divisive Coefficient for 'hclust' Objects} \description{ Computes the \dQuote{agglomerative coefficient} (aka \dQuote{divisive coefficient} for \code{\link{diana}}), measuring the clustering structure of the dataset. For each observation i, denote by \eqn{m(i)} its dissimilarity to the first cluster it is merged with, divided by the dissimilarity of the merger in the final step of the algorithm. The agglomerative coefficient is the average of all \eqn{1 - m(i)}. It can also be seen as the average width (or the percentage filled) of the banner plot. \code{coefHier()} directly interfaces to the underlying C code, and \dQuote{proves} that \emph{only} \code{object$heights} is needed to compute the coefficient. Because it grows with the number of observations, this measure should not be used to compare datasets of very different sizes. } \usage{ coefHier(object) coef.hclust(object, \dots)%-- we export this, on purpose \method{coef}{hclust}(object, \dots) \method{coef}{twins}(object, \dots) } \arguments{ \item{object}{an object of class \code{"hclust"} or \code{"twins"}, i.e., typically the result of \code{\link{hclust}(.)},\code{\link{agnes}(.)}, or \code{\link{diana}(.)}. Since \code{coef.hclust} only uses \code{object$heights}, and \code{object$merge}, \code{object} can be any list-like object with appropriate \code{merge} and \code{heights} components. For \code{coefHier}, even only \code{object$heights} is needed. } \item{\dots}{currently unused potential further arguments} } \value{ a number specifying the \emph{agglomerative} (or \emph{divisive} for \code{diana} objects) coefficient as defined by Kaufman and Rousseeuw, see \code{\link{agnes.object} $ ac} or \code{\link{diana.object} $ dc}. } \examples{ data(agriculture) aa <- agnes(agriculture) coef(aa) # really just extracts aa$ac coef(as.hclust(aa))# recomputes coefHier(aa) # ditto \dontshow{ stopifnot(all.equal(coef(aa), coefHier(aa))) d.a <- dist(agriculture, "manhattan") for (m in c("average", "single", "complete")) stopifnot(all.equal(coef(hclust(d.a, method=m)), coef(agnes (d.a, method=m)), tol=1e-13)) } } \keyword{cluster} cluster/man/pltree.Rd0000644000175100001440000000460112421464232014307 0ustar hornikusers\name{pltree} \alias{pltree} \alias{pltree.twins} \title{Plot Clustering Tree of a Hierarchical Clustering} \description{ \code{pltree()} Draws a clustering tree (\dQuote{dendrogram}) on the current graphics device. We provide the \code{twins} method draws the tree of a \code{twins} object, i.e., hierarchical clustering, typically resulting from \code{\link{agnes}()} or \code{\link{diana}()}. } \usage{ pltree(x, \dots) \method{pltree}{twins}(x, main = paste("Dendrogram of ", deparse(x$call)), labels = NULL, ylab = "Height", \dots) } \arguments{ \item{x}{in general, an \R object for which a \code{pltree} method is defined; specifically, an object of class \code{"twins"}, typically created by either \code{\link{agnes}()} or \code{\link{diana}()}.} \item{main}{main title with a sensible default.} \item{labels}{labels to use; the default is constructed from \code{x}.} \item{ylab}{label for y-axis.} \item{\dots}{graphical parameters (see \code{\link{par}}) may also be supplied as arguments to this function.} } \value{ a NULL value is returned. } \details{ Creates a plot of a clustering tree given a \code{twins} object. The leaves of the tree are the original observations. In case of an agglomerative clustering, two branches come together at the distance between the two clusters being merged. For a divisive clustering, a branch splits up at the diameter of the cluster being splitted. Note that currently the method function simply calls \code{plot(\link[stats]{as.hclust}(x), ...)}, which dispatches to \code{\link{plot.hclust}(..)}. If more flexible plots are needed, consider \code{xx <- \link{as.dendrogram}(\link{as.hclust}(x))} and plotting \code{xx}, see \code{\link{plot.dendrogram}}. } \seealso{ \code{\link{agnes}}, \code{\link{agnes.object}}, \code{\link{diana}}, \code{\link{diana.object}}, \code{\link{hclust}}, \code{\link{par}}, \code{\link{plot.agnes}}, \code{\link{plot.diana}}. } \examples{ data(votes.repub) agn <- agnes(votes.repub) pltree(agn) dagn <- as.dendrogram(as.hclust(agn)) dagn2 <- as.dendrogram(as.hclust(agn), hang = 0.2) op <- par(mar = par("mar") + c(0,0,0, 2)) # more space to the right plot(dagn2, horiz = TRUE) plot(dagn, horiz = TRUE, center = TRUE, nodePar = list(lab.cex = 0.6, lab.col = "forest green", pch = NA), main = deparse(agn$call)) par(op) } \keyword{cluster} \keyword{hplot} cluster/man/clusplot.default.Rd0000644000175100001440000002736112527057716016330 0ustar hornikusers\name{clusplot.default} \alias{clusplot.default} \title{Bivariate Cluster Plot (clusplot) Default Method} \description{ Creates a bivariate plot visualizing a partition (clustering) of the data. All observation are represented by points in the plot, using principal components or multidimensional scaling. Around each cluster an ellipse is drawn. } \usage{ \method{clusplot}{default}(x, clus, diss = FALSE, s.x.2d = mkCheckX(x, diss), stand = FALSE, lines = 2, shade = FALSE, color = FALSE, labels= 0, plotchar = TRUE, col.p = "dark green", col.txt = col.p, col.clus = if(color) c(2, 4, 6, 3) else 5, cex = 1, cex.txt = cex, span = TRUE, add = FALSE, xlim = NULL, ylim = NULL, main = paste("CLUSPLOT(", deparse(substitute(x)),")"), sub = paste("These two components explain", round(100 * var.dec, digits = 2), "\% of the point variability."), xlab = "Component 1", ylab = "Component 2", verbose = getOption("verbose"), \dots) } \arguments{ \item{x}{matrix or data frame, or dissimilarity matrix, depending on the value of the \code{diss} argument. In case of a matrix (alike), each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (\code{\link{NA}}s) are allowed. They are replaced by the median of the corresponding variable. When some variables or some observations contain only missing values, the function stops with a warning message. In case of a dissimilarity matrix, \code{x} is the output of \code{\link{daisy}} or \code{\link{dist}} or a symmetric matrix. Also, a vector of length \eqn{n*(n-1)/2} is allowed (where \eqn{n} is the number of observations), and will be interpreted in the same way as the output of the above-mentioned functions. Missing values (NAs) are not allowed. } \item{clus}{ a vector of length n representing a clustering of \code{x}. For each observation the vector lists the number or name of the cluster to which it has been assigned. \code{clus} is often the clustering component of the output of \code{\link{pam}}, \code{\link{fanny}} or \code{\link{clara}}.} \item{diss}{ logical indicating if \code{x} will be considered as a dissimilarity matrix or a matrix of observations by variables (see \code{x} arugment above).} \item{s.x.2d}{a \code{\link{list}} with components named \code{x} (a \eqn{n \times 2} matrix; typically something like principal components of original data), \code{labs} and \code{var.dec}.}% FIXME: 'labs' and 'var.dec' are not always needed \item{stand}{ logical flag: if true, then the representations of the n observations in the 2-dimensional plot are standardized. } \item{lines}{ integer out of \code{0, 1, 2}, used to obtain an idea of the distances between ellipses. The distance between two ellipses E1 and E2 is measured along the line connecting the centers \eqn{m1} and \eqn{m2} of the two ellipses. In case E1 and E2 overlap on the line through \eqn{m1} and \eqn{m2}, no line is drawn. Otherwise, the result depends on the value of \code{lines}: If \describe{ \item{lines = 0,}{no distance lines will appear on the plot;} \item{lines = 1,}{the line segment between \eqn{m1} and \eqn{m2} is drawn;} \item{lines = 2,}{a line segment between the boundaries of E1 and E2 is drawn (along the line connecting \eqn{m1} and \eqn{m2}).} } } \item{shade}{ logical flag: if TRUE, then the ellipses are shaded in relation to their density. The density is the number of points in the cluster divided by the area of the ellipse. } \item{color}{ logical flag: if TRUE, then the ellipses are colored with respect to their density. With increasing density, the colors are light blue, light green, red and purple. To see these colors on the graphics device, an appropriate color scheme should be selected (we recommend a white background).} \item{labels}{ integer code, currently one of 0,1,2,3,4 and 5. If \describe{ \item{labels= 0,}{no labels are placed in the plot;} \item{labels= 1,}{points and ellipses can be identified in the plot (see \code{\link{identify}});} \item{labels= 2,}{all points and ellipses are labelled in the plot;} \item{labels= 3,}{only the points are labelled in the plot;} \item{labels= 4,}{only the ellipses are labelled in the plot.} \item{labels= 5,}{the ellipses are labelled in the plot, and points can be identified.} } The levels of the vector \code{clus} are taken as labels for the clusters. The labels of the points are the rownames of \code{x} if \code{x} is matrix like. Otherwise (\code{diss = TRUE}), \code{x} is a vector, point labels can be attached to \code{x} as a "Labels" attribute (\code{attr(x,"Labels")}), as is done for the output of \code{\link{daisy}}. A possible \code{\link{names}} attribute of \code{clus} will not be taken into account. } \item{plotchar}{ logical flag: if TRUE, then the plotting symbols differ for points belonging to different clusters. } \item{span}{ logical flag: if TRUE, then each cluster is represented by the ellipse with smallest area containing all its points. (This is a special case of the minimum volume ellipsoid.)\cr If FALSE, the ellipse is based on the mean and covariance matrix of the same points. While this is faster to compute, it often yields a much larger ellipse. There are also some special cases: When a cluster consists of only one point, a tiny circle is drawn around it. When the points of a cluster fall on a straight line, \code{span=FALSE} draws a narrow ellipse around it and \code{span=TRUE} gives the exact line segment. } \item{add}{logical indicating if ellipses (and labels if \code{labels} is true) should be \emph{added} to an already existing plot. If false, neither a \code{\link{title}} or sub title, see \code{sub}, is written.} \item{col.p}{color code(s) used for the observation points.} \item{col.txt}{color code(s) used for the labels (if \code{labels >= 2}).} \item{col.clus}{color code for the ellipses (and their labels); only one if color is false (as per default).} \item{cex, cex.txt}{character \bold{ex}pansion (size), for the point symbols and point labels, respectively.} \item{xlim, ylim}{numeric vectors of length 2, giving the x- and y- ranges as in \code{\link{plot.default}}.} \item{main}{main title for the plot; by default, one is constructed.} \item{sub}{sub title for the plot; by default, one is constructed.} \item{xlab, ylab}{x- and y- axis labels for the plot, with defaults.} \item{verbose}{a logical indicating, if there should be extra diagnostic output; mainly for \sQuote{debugging}.} \item{\dots}{Further graphical parameters may also be supplied, see \code{\link{par}}.} }% End Arguments \value{ An invisible list with components: \item{Distances}{ When \code{lines} is 1 or 2 we optain a k by k matrix (k is the number of clusters). The element in \code{[i,j]} is the distance between ellipse i and ellipse j.\cr If \code{lines = 0}, then the value of this component is \code{NA}. } \item{Shading}{ A vector of length k (where k is the number of clusters), containing the amount of shading per cluster. Let y be a vector where element i is the ratio between the number of points in cluster i and the area of ellipse i. When the cluster i is a line segment, y[i] and the density of the cluster are set to \code{NA}. Let z be the sum of all the elements of y without the NAs. Then we put shading = y/z *37 + 3 . } } \section{Side Effects}{ a visual display of the clustering is plotted on the current graphics device. } \details{ \code{clusplot} uses the functions \code{\link{princomp}} and \code{\link{cmdscale}}. These functions are data reduction techniques. They will represent the data in a bivariate plot. Ellipses are then drawn to indicate the clusters. The further layout of the plot is determined by the optional arguments. } \note{ When we have 4 or fewer clusters, then the \code{color=TRUE} gives every cluster a different color. When there are more than 4 clusters, clusplot uses the function \code{\link{pam}} to cluster the densities into 4 groups such that ellipses with nearly the same density get the same color. \code{col.clus} specifies the colors used. The \code{col.p} and \code{col.txt} arguments, added for \R, are recycled to have length the number of observations. If \code{col.p} has more than one value, using \code{color = TRUE} can be confusing because of a mix of point and ellipse colors. } \references{ Pison, G., Struyf, A. and Rousseeuw, P.J. (1999) Displaying a Clustering with CLUSPLOT, \emph{Computational Statistics and Data Analysis}, \bold{30}, 381--392.\cr %% Jan.2015 : no longer there: %% A version of this is available as technical report from %% \url{http://www.agoras.ua.ac.be/abstract/Disclu99.htm} Kaufman, L. and Rousseeuw, P.J. (1990). \emph{Finding Groups in Data: An Introduction to Cluster Analysis.} Wiley, New York. Struyf, A., Hubert, M. and Rousseeuw, P.J. (1997). Integrating Robust Clustering Techniques in S-PLUS, \emph{Computational Statistics and Data Analysis}, \bold{26}, 17-37. } \seealso{ \code{\link{princomp}}, \code{\link{cmdscale}}, \code{\link{pam}}, \code{\link{clara}}, \code{\link{daisy}}, \code{\link{par}}, \code{\link{identify}}, \code{\link[MASS]{cov.mve}}, \code{\link{clusplot.partition}}. } \examples{ ## plotting votes.diss(dissimilarity) in a bivariate plot and ## partitioning into 2 clusters data(votes.repub) votes.diss <- daisy(votes.repub) pamv <- pam(votes.diss, 2, diss = TRUE) clusplot(pamv, shade = TRUE) ## is the same as votes.clus <- pamv$clustering clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE) ## Now look at components 3 and 2 instead of 1 and 2: str(cMDS <- cmdscale(votes.diss, k=3, add=TRUE)) clusplot(pamv, s.x.2d = list(x=cMDS$points[, c(3,2)], labs=rownames(votes.repub), var.dec=NA), shade = TRUE, col.p = votes.clus, sub="", xlab = "Component 3", ylab = "Component 2") clusplot(pamv, col.p = votes.clus, labels = 4)# color points and label ellipses # "simple" cheap ellipses: larger than minimum volume: # here they are *added* to the previous plot: clusplot(pamv, span = FALSE, add = TRUE, col.clus = "midnightblue") ## a work-around for setting a small label size: clusplot(votes.diss, votes.clus, diss = TRUE) op <- par(new=TRUE, cex = 0.6) clusplot(votes.diss, votes.clus, diss = TRUE, axes=FALSE,ann=FALSE, sub="", col.p=NA, col.txt="dark green", labels=3) par(op) ## MM: This should now be as simple as clusplot(votes.diss, votes.clus, diss = TRUE, labels = 3, cex.txt = 0.6) if(interactive()) { # uses identify() *interactively* : clusplot(votes.diss, votes.clus, diss = TRUE, shade = TRUE, labels = 1) clusplot(votes.diss, votes.clus, diss = TRUE, labels = 5)# ident. only points } ## plotting iris (data frame) in a 2-dimensional plot and partitioning ## into 3 clusters. data(iris) iris.x <- iris[, 1:4] cl3 <- pam(iris.x, 3)$clustering op <- par(mfrow= c(2,2)) clusplot(iris.x, cl3, color = TRUE) U <- par("usr") ## zoom in : rect(0,-1, 2,1, border = "orange", lwd=2) clusplot(iris.x, cl3, color = TRUE, xlim = c(0,2), ylim = c(-1,1)) box(col="orange",lwd=2); mtext("sub region", font = 4, cex = 2) ## or zoom out : clusplot(iris.x, cl3, color = TRUE, xlim = c(-4,4), ylim = c(-4,4)) mtext("`super' region", font = 4, cex = 2) rect(U[1],U[3], U[2],U[4], lwd=2, lty = 3) # reset graphics par(op) } \keyword{cluster} \keyword{hplot} cluster/man/print.fanny.Rd0000644000175100001440000000165010247645540015272 0ustar hornikusers\name{print.fanny} \alias{print.fanny} \alias{summary.fanny} \alias{print.summary.fanny} \title{Print and Summary Methods for FANNY Objects} \description{ Prints the objective function, membership coefficients and clustering vector of \code{fanny} object. This is a method for the function \code{\link{print}()} for objects inheriting from class \code{\link{fanny}}. } \usage{ \method{print}{fanny}(x, digits = getOption("digits"), \dots) \method{summary}{fanny}(object, \dots) \method{print}{summary.fanny}(x, digits = getOption("digits"), \dots) } \arguments{ \item{x, object}{a \code{\link{fanny}} object.} \item{digits}{number of significant digits for printing, see \code{\link{print.default}}.} \item{\dots}{potential further arguments (required by generic).} } \seealso{ \code{\link{fanny}}, \code{\link{fanny.object}}, \code{\link{print}}, \code{\link{print.default}}. } \keyword{cluster} \keyword{print} cluster/man/sizeDiss.Rd0000644000175100001440000000171010224226467014614 0ustar hornikusers\name{sizeDiss} \alias{sizeDiss} \title{Sample Size of Dissimilarity Like Object} \description{ Returns the number of observations (\emph{sample size}) corresponding to a dissimilarity like object, or equivalently, the number of rows or columns of a matrix when only the lower or upper triangular part (without diagonal) is given. It is nothing else but the inverse function of \eqn{f(n) = n(n-1)/2}. } \usage{ sizeDiss(d) } \arguments{ \item{d}{any \R object with length (typically) \eqn{n(n-1)/2}.} } \value{ a number; \eqn{n} if \code{length(d) == n(n-1)/2}, \code{NA} otherwise. } \seealso{\code{\link{dissimilarity.object}} and also \code{\link{as.dist}} for class \code{dissimilarity} and \code{dist} objects which have a \code{Size} attribute.} \examples{ sizeDiss(1:10)# 5, since 10 == 5 * (5 - 1) / 2 sizeDiss(1:9) # NA n <- 1:100 stopifnot(n == sapply( n*(n-1)/2, function(n) sizeDiss(logical(n)))) } \keyword{utilities} \keyword{arith} cluster/man/fanny.Rd0000644000175100001440000001521612236154412014133 0ustar hornikusers\name{fanny} \alias{fanny} \title{Fuzzy Analysis Clustering} \description{ Computes a fuzzy clustering of the data into \code{k} clusters. } \usage{ fanny(x, k, diss = inherits(x, "dist"), memb.exp = 2, metric = c("euclidean", "manhattan", "SqEuclidean"), stand = FALSE, iniMem.p = NULL, cluster.only = FALSE, keep.diss = !diss && !cluster.only && n < 100, keep.data = !diss && !cluster.only, maxit = 500, tol = 1e-15, trace.lev = 0) } \arguments{ \item{x}{ data matrix or data frame, or dissimilarity matrix, depending on the value of the \code{diss} argument. In case of a matrix or data frame, each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (NAs) are allowed. In case of a dissimilarity matrix, \code{x} is typically the output of \code{\link{daisy}} or \code{\link{dist}}. Also a vector of length n*(n-1)/2 is allowed (where n is the number of observations), and will be interpreted in the same way as the output of the above-mentioned functions. Missing values (NAs) are not allowed. } \item{k}{integer giving the desired number of clusters. It is required that \eqn{0 < k < n/2} where \eqn{n} is the number of observations.} \item{diss}{ logical flag: if TRUE (default for \code{dist} or \code{dissimilarity} objects), then \code{x} is assumed to be a dissimilarity matrix. If FALSE, then \code{x} is treated as a matrix of observations by variables. } \item{memb.exp}{number \eqn{r} strictly larger than 1 specifying the \emph{membership exponent} used in the fit criterion; see the \sQuote{Details} below. Default: \code{2} which used to be hardwired inside FANNY.} \item{metric}{character string specifying the metric to be used for calculating dissimilarities between observations. Options are \code{"euclidean"} (default), \code{"manhattan"}, and \code{"SqEuclidean"}. Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences, and \code{"SqEuclidean"}, the \emph{squared} euclidean distances are sum-of-squares of differences. Using this last option is equivalent (but somewhat slower) to computing so called \dQuote{fuzzy C-means}. \cr If \code{x} is already a dissimilarity matrix, then this argument will be ignored. } \item{stand}{logical; if true, the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. If \code{x} is already a dissimilarity matrix, then this argument will be ignored.} \item{iniMem.p}{numeric \eqn{n \times k}{n * k} matrix or \code{NULL} (by default); can be used to specify a starting \code{membership} matrix, i.e., a matrix of non-negative numbers, each row summing to one. } %% FIXME: add example \item{cluster.only}{logical; if true, no silhouette information will be computed and returned, see details.}%% FIXME: add example \item{keep.diss, keep.data}{logicals indicating if the dissimilarities and/or input data \code{x} should be kept in the result. Setting these to \code{FALSE} can give smaller results and hence also save memory allocation \emph{time}.} \item{maxit, tol}{maximal number of iterations and default tolerance for convergence (relative convergence of the fit criterion) for the FANNY algorithm. The defaults \code{maxit = 500} and \code{tol = 1e-15} used to be hardwired inside the algorithm.} \item{trace.lev}{integer specifying a trace level for printing diagnostics during the C-internal algorithm. Default \code{0} does not print anything; higher values print increasingly more.} } \value{ an object of class \code{"fanny"} representing the clustering. See \code{\link{fanny.object}} for details. } \details{ In a fuzzy clustering, each observation is \dQuote{spread out} over the various clusters. Denote by \eqn{u_{iv}}{u(i,v)} the membership of observation \eqn{i} to cluster \eqn{v}. The memberships are nonnegative, and for a fixed observation i they sum to 1. The particular method \code{fanny} stems from chapter 4 of Kaufman and Rousseeuw (1990) (see the references in \code{\link{daisy}}) and has been extended by Martin Maechler to allow user specified \code{memb.exp}, \code{iniMem.p}, \code{maxit}, \code{tol}, etc. Fanny aims to minimize the objective function \deqn{\sum_{v=1}^k \frac{\sum_{i=1}^n\sum_{j=1}^n u_{iv}^r u_{jv}^r d(i,j)}{ 2 \sum_{j=1}^n u_{jv}^r}}{% SUM_[v=1..k] (SUM_(i,j) u(i,v)^r u(j,v)^r d(i,j)) / (2 SUM_j u(j,v)^r)} where \eqn{n} is the number of observations, \eqn{k} is the number of clusters, \eqn{r} is the membership exponent \code{memb.exp} and \eqn{d(i,j)} is the dissimilarity between observations \eqn{i} and \eqn{j}. \cr Note that \eqn{r \to 1}{r -> 1} gives increasingly crisper clusterings whereas \eqn{r \to \infty}{r -> Inf} leads to complete fuzzyness. K&R(1990), p.191 note that values too close to 1 can lead to slow convergence. Further note that even the default, \eqn{r = 2} can lead to complete fuzzyness, i.e., memberships \eqn{u_{iv} \equiv 1/k}{u(i,v) == 1/k}. In that case a warning is signalled and the user is advised to chose a smaller \code{memb.exp} (\eqn{=r}). Compared to other fuzzy clustering methods, \code{fanny} has the following features: (a) it also accepts a dissimilarity matrix; (b) it is more robust to the \code{spherical cluster} assumption; (c) it provides a novel graphical display, the silhouette plot (see \code{\link{plot.partition}}). } \seealso{ \code{\link{agnes}} for background and references; \code{\link{fanny.object}}, \code{\link{partition.object}}, \code{\link{plot.partition}}, \code{\link{daisy}}, \code{\link{dist}}. } \examples{ ## generate 10+15 objects in two clusters, plus 3 objects lying ## between those clusters. x <- rbind(cbind(rnorm(10, 0, 0.5), rnorm(10, 0, 0.5)), cbind(rnorm(15, 5, 0.5), rnorm(15, 5, 0.5)), cbind(rnorm( 3,3.2,0.5), rnorm( 3,3.2,0.5))) fannyx <- fanny(x, 2) ## Note that observations 26:28 are "fuzzy" (closer to # 2): fannyx summary(fannyx) plot(fannyx) (fan.x.15 <- fanny(x, 2, memb.exp = 1.5)) # 'crispier' for obs. 26:28 (fanny(x, 2, memb.exp = 3)) # more fuzzy in general data(ruspini) f4 <- fanny(ruspini, 4) stopifnot(rle(f4$clustering)$lengths == c(20,23,17,15)) plot(f4, which = 1) ## Plot similar to Figure 6 in Stryuf et al (1996) plot(fanny(ruspini, 5)) } \keyword{cluster} cluster/man/twins.object.Rd0000644000175100001440000000134310370161217015423 0ustar hornikusers\name{twins.object} \alias{twins.object} \alias{twins}% == class \title{Hierarchical Clustering Object} \description{ The objects of class \code{"twins"} represent an agglomerative or divisive (polythetic) hierarchical clustering of a dataset. } \section{GENERATION}{ This class of objects is returned from \code{agnes} or \code{diana}. } \section{METHODS}{ The \code{"twins"} class has a method for the following generic function: \code{pltree}. } \section{INHERITANCE}{ The following classes inherit from class \code{"twins"} : \code{"agnes"} and \code{"diana"}. } \value{ See \code{\link{agnes.object}} and \code{\link{diana.object}} for details. } \seealso{\code{\link{agnes}},\code{\link{diana}}. } \keyword{cluster} cluster/man/diana.Rd0000644000175100001440000001571512402104635014075 0ustar hornikusers\name{diana} \title{DIvisive ANAlysis Clustering} \alias{diana} \alias{diana.object} \description{ Computes a divisive hierarchical clustering of the dataset returning an object of class \code{diana}. } \usage{ diana(x, diss = inherits(x, "dist"), metric = "euclidean", stand = FALSE, keep.diss = n < 100, keep.data = !diss, trace.lev = 0) } \arguments{ \item{x}{ data matrix or data frame, or dissimilarity matrix or object, depending on the value of the \code{diss} argument. In case of a matrix or data frame, each row corresponds to an observation, and each column corresponds to a variable. All variables must be numeric. Missing values (\code{\link{NA}}s) \emph{are} allowed. In case of a dissimilarity matrix, \code{x} is typically the output of \code{\link{daisy}} or \code{\link{dist}}. Also a vector of length n*(n-1)/2 is allowed (where n is the number of observations), and will be interpreted in the same way as the output of the above-mentioned functions. Missing values (NAs) are \emph{not} allowed. } \item{diss}{ logical flag: if TRUE (default for \code{dist} or \code{dissimilarity} objects), then \code{x} will be considered as a dissimilarity matrix. If FALSE, then \code{x} will be considered as a matrix of observations by variables. } \item{metric}{ character string specifying the metric to be used for calculating dissimilarities between observations.\cr The currently available options are "euclidean" and "manhattan". Euclidean distances are root sum-of-squares of differences, and manhattan distances are the sum of absolute differences. If \code{x} is already a dissimilarity matrix, then this argument will be ignored. } \item{stand}{logical; if true, the measurements in \code{x} are standardized before calculating the dissimilarities. Measurements are standardized for each variable (column), by subtracting the variable's mean value and dividing by the variable's mean absolute deviation. If \code{x} is already a dissimilarity matrix, then this argument will be ignored.} \item{keep.diss, keep.data}{logicals indicating if the dissimilarities and/or input data \code{x} should be kept in the result. Setting these to \code{FALSE} can give much smaller results and hence even save memory allocation \emph{time}.} \item{trace.lev}{integer specifying a trace level for printing diagnostics during the algorithm. Default \code{0} does not print anything; higher values print increasingly more.} } \value{ an object of class \code{"diana"} representing the clustering; this class has methods for the following generic functions: \code{print}, \code{summary}, \code{plot}. Further, the class \code{"diana"} inherits from \code{"twins"}. Therefore, the generic function \code{\link{pltree}} can be used on a \code{diana} object, and \code{\link{as.hclust}} and \code{\link{as.dendrogram}} methods are available. A legitimate \code{diana} object is a list with the following components: \item{order}{ a vector giving a permutation of the original observations to allow for plotting, in the sense that the branches of a clustering tree will not cross. } \item{order.lab}{ a vector similar to \code{order}, but containing observation labels instead of observation numbers. This component is only available if the original observations were labelled. } \item{height}{a vector with the diameters of the clusters prior to splitting. } \item{dc}{ the divisive coefficient, measuring the clustering structure of the dataset. For each observation i, denote by \eqn{d(i)} the diameter of the last cluster to which it belongs (before being split off as a single observation), divided by the diameter of the whole dataset. The \code{dc} is the average of all \eqn{1 - d(i)}. It can also be seen as the average width (or the percentage filled) of the banner plot. Because \code{dc} grows with the number of observations, this measure should not be used to compare datasets of very different sizes. } \item{merge}{ an (n-1) by 2 matrix, where n is the number of observations. Row i of \code{merge} describes the split at step n-i of the clustering. If a number \eqn{j} in row r is negative, then the single observation \eqn{|j|} is split off at stage n-r. If j is positive, then the cluster that will be splitted at stage n-j (described by row j), is split off at stage n-r. } \item{diss}{ an object of class \code{"dissimilarity"}, representing the total dissimilarity matrix of the dataset. } \item{data}{ a matrix containing the original or standardized measurements, depending on the \code{stand} option of the function \code{agnes}. If a dissimilarity matrix was given as input structure, then this component is not available. } } \details{ \code{diana} is fully described in chapter 6 of Kaufman and Rousseeuw (1990). It is probably unique in computing a divisive hierarchy, whereas most other software for hierarchical clustering is agglomerative. Moreover, \code{diana} provides (a) the divisive coefficient (see \code{diana.object}) which measures the amount of clustering structure found; and (b) the banner, a novel graphical display (see \code{plot.diana}). The \code{diana}-algorithm constructs a hierarchy of clusterings, starting with one large cluster containing all n observations. Clusters are divided until each cluster contains only a single observation.\cr At each stage, the cluster with the largest diameter is selected. (The diameter of a cluster is the largest dissimilarity between any two of its observations.)\cr To divide the selected cluster, the algorithm first looks for its most disparate observation (i.e., which has the largest average dissimilarity to the other observations of the selected cluster). This observation initiates the "splinter group". In subsequent steps, the algorithm reassigns observations that are closer to the "splinter group" than to the "old party". The result is a division of the selected cluster into two new clusters. } \seealso{ \code{\link{agnes}} also for background and references; \code{\link{cutree}} (and \code{\link{as.hclust}}) for grouping extraction; \code{\link{daisy}}, \code{\link{dist}}, \code{\link{plot.diana}}, \code{\link{twins.object}}. } \examples{ data(votes.repub) dv <- diana(votes.repub, metric = "manhattan", stand = TRUE) print(dv) plot(dv) ## Cut into 2 groups: dv2 <- cutree(as.hclust(dv), k = 2) table(dv2) # 8 and 42 group members rownames(votes.repub)[dv2 == 1] ## For two groups, does the metric matter ? dv0 <- diana(votes.repub, stand = TRUE) # default: Euclidean dv.2 <- cutree(as.hclust(dv0), k = 2) table(dv2 == dv.2)## identical group assignments str(as.dendrogram(dv0)) # {via as.dendrogram.twins() method} data(agriculture) ## Plot similar to Figure 8 in ref \dontrun{plot(diana(agriculture), ask = TRUE)} \dontshow{plot(diana(agriculture))} } \keyword{cluster} cluster/man/ruspini.Rd0000644000175100001440000000136311711740461014511 0ustar hornikusers\name{ruspini} \alias{ruspini} \title{Ruspini Data} \usage{data(ruspini)} \description{ The Ruspini data set, consisting of 75 points in four groups that is popular for illustrating clustering techniques. } \format{ A data frame with 75 observations on 2 variables giving the x and y coordinates of the points, respectively. } \source{ E. H. Ruspini (1970) Numerical methods for fuzzy clustering. \emph{Inform. Sci.} \bold{2}, 319--350. } \references{ see those in \code{\link{agnes}}. } \examples{ data(ruspini) ## Plot similar to Figure 4 in Stryuf et al (1996) \dontrun{plot(pam(ruspini, 4), ask = TRUE)} \dontshow{plot(pam(ruspini, 4))} ## Plot similar to Figure 6 in Stryuf et al (1996) plot(fanny(ruspini, 5)) } \keyword{datasets} cluster/man/mona.Rd0000644000175100001440000000733212310033665013751 0ustar hornikusers\name{mona} \alias{mona} \title{MONothetic Analysis Clustering of Binary Variables} \description{ Returns a list representing a divisive hierarchical clustering of a dataset with binary variables only. } \usage{ mona(x) } \arguments{ \item{x}{ data matrix or data frame in which each row corresponds to an observation, and each column corresponds to a variable. All variables must be binary. A limited number of missing values (NAs) is allowed. Every observation must have at least one value different from NA. No variable should have half of its values missing. There must be at least one variable which has no missing values. A variable with all its non-missing values identical, is not allowed. } } \value{ an object of class \code{"mona"} representing the clustering. See \code{mona.object} for details. } \details{ \code{mona} is fully described in chapter 7 of Kaufman and Rousseeuw (1990). It is "monothetic" in the sense that each division is based on a single (well-chosen) variable, whereas most other hierarchical methods (including \code{agnes} and \code{diana}) are "polythetic", i.e. they use all variables together. The \code{mona}-algorithm constructs a hierarchy of clusterings, starting with one large cluster. Clusters are divided until all observations in the same cluster have identical values for all variables.\cr At each stage, all clusters are divided according to the values of one variable. A cluster is divided into one cluster with all observations having value 1 for that variable, and another cluster with all observations having value 0 for that variable. The variable used for splitting a cluster is the variable with the maximal total association to the other variables, according to the observations in the cluster to be splitted. The association between variables f and g is given by a(f,g)*d(f,g) - b(f,g)*c(f,g), where a(f,g), b(f,g), c(f,g), and d(f,g) are the numbers in the contingency table of f and g. [That is, a(f,g) (resp. d(f,g)) is the number of observations for which f and g both have value 0 (resp. value 1); b(f,g) (resp. c(f,g)) is the number of observations for which f has value 0 (resp. 1) and g has value 1 (resp. 0).] The total association of a variable f is the sum of its associations to all variables. This algorithm does not work with missing values, therefore the data are revised, e.g. all missing values are filled in. To do this, the same measure of association between variables is used as in the algorithm. When variable f has missing values, the variable g with the largest absolute association to f is looked up. When the association between f and g is positive, any missing value of f is replaced by the value of g for the same observation. If the association between f and g is negative, then any missing value of f is replaced by the value of 1-g for the same observation. } \seealso{ \code{\link{agnes}} for background and references; \code{\link{mona.object}}, \code{\link{plot.mona}}. } \examples{ data(animals) ma <- mona(animals) ma ## Plot similar to Figure 10 in Struyf et al (1996) plot(ma) ## One place to see if/how error messages are *translated* (to 'de' / 'pl'): ani.NA <- animals; ani.NA[4,] <- NA aniNA <- within(animals, { end[2:9] <- NA }) if(getRversion() >= 3.0) { aniN2 <- animals; aniN2[cbind(1:6, c(3, 1, 4:6, 2))] <- NA } ani.non2 <- within(animals, end[7] <- 3 ) ani.idNA <- within(animals, end[!is.na(end)] <- 1 ) try( mona(ani.NA) ) ## error: .. object with all values missing try( mona(aniNA) ) ## error: .. more than half missing values if(getRversion() >= 3.0) try( mona(aniN2) ) ## error: all have at least one missing try( mona(ani.non2) ) ## error: all must be binary try( mona(ani.idNA) ) ## error: ditto } \keyword{cluster} cluster/INDEX0000644000175100001440000000526710366141463012561 0ustar hornikusersagnes Agglomerative Nesting clara Clustering Large Applications daisy Dissimilarity Matrix Calculation diana DIvisive ANAlysis Clustering fanny Fuzzy Analysis Clustering mona MONothetic Analysis Clustering of Binary Variables pam Partitioning Around Medoids dissimilarity.object Dissimilarity Matrix Object partition.object Partitioning Object twins.object Hierarchical Clustering Object agnes.object Agglomerative Nesting (AGNES) Object clara.object Clustering Large Applications (CLARA) Object diana.object Divisive Analysis (DIANA) Object fanny.object Fuzzy Analysis (FANNY) Object mona.object Monothetic Analysis (MONA) Object pam.object Partitioning Around Medoids (PAM) Object sizeDiss Sample Size of Dissimilarity Like Object clusplot Cluster Plot - Generic Function clusplot.default Bivariate Cluster Plot (Clusplot) Default Method clusplot.partition Bivariate Clusplot of a Partitioning Object coef.hclust Agglomerative Coefficient for 'hclust' Objects pltree Clustering Trees - Generic Function pltree.twins Clustering Tree of a Hierarchical Clustering bannerplot Plot Banner (of Hierarchical Clustering) silhouette Compute or Extract Silhouette Information from Clustering ellipsoidhull Compute the Ellipsoid Hull or Spanning Ellipsoid of a Point Set predict.ellipsoid Predict Method for Ellipsoid Objects volume.ellipsoid Compute the Volume of Planar Object lower.to.upper.tri.inds Permute Indices for Triangular Matrices plot.agnes Plots of an Agglomerative Hierarchical Clustering plot.diana Plots of a Divisive Hierarchical Clustering plot.mona Banner of Monothetic Divisive Hierarchical Clusterings plot.partition Plot of a Partition of the Data Set print.dissimilarity Print and Summary Methods for Dissimilarity Objects print.agnes Print Method for AGNES Objects print.clara Print Method for CLARA Objects print.diana Print Method for DIANA Objects print.fanny Print Method for FANNY Objects print.mona Print Method for MONA Objects print.pam Print Method for PAM Objects summary.agnes Summary Method for 'agnes' Objects summary.clara Summary Method for 'clara' Objects summary.diana Summary Method for 'diana' Objects summary.fanny Summary Method for 'fanny' Objects summary.mona Summary Method for 'mona' Objects summary.pam Summary Method for PAM Objects cluster-internal Internal cluster functions DATASETS agriculture European Union Agricultural Workforces animals Attributes of Animals flower Flower Characteristics pluton Isotopic Composition Plutonium Batches ruspini Ruspini Data votes.repub Votes for Republican Candidate in Presidential Elections xclara Bivariate Data Set with 3 Clusters