statistics-1.3.0/COPYING0000755000000000000000000001243612776476211013114 0ustar 00000000000000inst/private/tbl_delim.m.m GPLv3+ inst/anderson_darling_cdf.m public domain inst/anderson_darling_test.m public domain inst/anovan.m GPLv3+ inst/bbscdf.m GPLv3+ inst/bbsinv.m GPLv3+ inst/bbspdf.m GPLv3+ inst/bbsrnd.m GPLv3+ inst/betastat.m GPLv3+ inst/binostat.m GPLv3+ inst/binotest.m GPLv3+ inst/boxplot.m GPLv3+ inst/burrcdf.m GPLv3+ inst/burrinv.m GPLv3+ inst/burrpdf.m GPLv3+ inst/burrrnd.m GPLv3+ inst/caseread.m GPLv3+ inst/casewrite.m GPLv3+ inst/cdf.m GPLv3+ inst/chi2stat.m GPLv3+ inst/cl_multinom.m GPLv3+ inst/cmdscale.m GPLv3+ inst/combnk.m GPLv3+ inst/copulacdf.m GPLv3+ inst/copulapdf.m GPLv3+ inst/copularnd.m GPLv3+ inst/crossval.m GPLv3+ inst/dcov.m GPLv3+ inst/dendogram.m GPLv3+ inst/expstat.m GPLv3+ inst/ff2n.m public domain inst/fstat.m GPLv3+ inst/fullfact.m public domain inst/gamfit.m public domain inst/gamlike.m public domain inst/gamstat.m GPLv3+ inst/geomean.m GPLv3+ inst/geostat.m GPLv3+ inst/gevcdf.m GPLv3+ inst/gevfit_lmom.m GPLv3+ inst/gevfit.m GPLv3+ inst/gevinv.m GPLv3+ inst/gevlike.m GPLv3+ inst/gevpdf.m GPLv3+ inst/gevrnd.m GPLv3+ inst/gevstat.m GPLv3+ inst/gpcdf.m GPLv3+ inst/gpinv.m GPLv3+ inst/gppdf.m GPLv3+ inst/gprnd.m GPLv3+ inst/grp2idx.m GPLv3+ inst/harmmean.m GPLv3+ inst/hist3.m GPLv3+ inst/histfit.m GPLv3+ inst/hmmestimate.m GPLv3+ inst/hmmgenerate.m GPLv3+ inst/hmmviterbi.m GPLv3+ inst/iwishpdf.m GPLv3+ inst/iwishrnd.m GPLv3+ inst/hygestat.m GPLv3+ inst/jackknife.m GPLv3+ inst/jsucdf.m GPLv3+ inst/jsupdf.m GPLv3+ inst/kmeans.m GPLv3+ inst/linkage.m GPLv3+ inst/lognstat.m GPLv3+ inst/mad.m GPLv3+ inst/mahal.m GPLv3+ inst/mnpdf.m GPLv3+ inst/mnrnd.m GPLv3+ inst/monotone_smooth.m GPLv3+ inst/mvncdf.m GPLv3+ inst/mvnpdf.m public domain inst/mvnrnd.m GPLv3+ inst/mvtcdf.m GPLv3+ inst/mvtpdf.m GPLv3+ inst/mvtrnd.m GPLv3+ inst/nakacdf.m GPLv3+ inst/nakainv.m GPLv3+ inst/nakapdf.m GPLv3+ inst/nakarnd.m GPLv3+ inst/nanmax.m GPLv3+ inst/nanmean.m GPLv3+ inst/nanmedian.m GPLv3+ inst/nanmin.m GPLv3+ inst/nanstd.m GPLv3+ inst/nansum.m GPLv3+ inst/nanvar.m GPLv3+ inst/nbinstat.m GPLv3+ inst/normalise_distribution.m GPLv3+ inst/normplot.m public domain inst/normstat.m GPLv3+ inst/pcacov.m GPLv3+ inst/pcares.m GPLv3+ inst/pdf.m GPLv3+ inst/pdist.m GPLv3+ inst/pdist2.m GPLv3+ inst/plsregress.m GPLv3+ inst/poisstat.m GPLv3+ inst/princomp.m public domain inst/qrandn.m GPLv3+ inst/random.m GPLv3+ inst/randsample.m GPLv3+ inst/raylcdf.m GPLv3+ inst/raylinv.m GPLv3+ inst/raylpdf.m GPLv3+ inst/raylrnd.m GPLv3+ inst/raylstat.m GPLv3+ inst/regress_gp.m GPLv3+ inst/regress.m GPLv3+ inst/repanova.m.m GPLv3+ inst/runtest.m GPLv3+ inst/signtest.m GPLv3+ inst/squareform.m GPLv3+ inst/stepwisefit.m GPLv3+ inst/tabulate.m GPLv3+ inst/tblread.m GPLv3+ inst/tblwrite.m GPLv3+ inst/tricdf.m GPLv3+ inst/triinv.m GPLv3+ inst/trimmean.m GPLv3+ inst/tripdf.m GPLv3+ inst/trirnd.m GPLv3+ inst/tstat.m GPLv3+ inst/ttest.m GPLv3+ inst/ttest2.m GPLv3+ inst/unidstat.m GPLv3+ inst/unifstat.m GPLv3+ inst/vartest.m GPLv3+ inst/vartest2.m GPLv3+ inst/violin.m GPLv3+ inst/vmpdf.m GPLv3+ inst/vmrnd.m GPLv3+ inst/wblstat.m GPLv3+ inst/wishpdf.m GPLv3+ inst/wishrnd.m GPLv3+ inst/ztest.m GPLv3+ statistics-1.3.0/DESCRIPTION0000755000000000000000000000047212776476211013564 0ustar 00000000000000Name: statistics Version: 1.3.0 Date: 2016-10-09 Author: various authors Maintainer: Arno Onken Title: Statistics Description: Additional statistics functions for Octave. Categories: Statistics Depends: octave (>= 4.0.0), io (>= 1.0.18) License: GPLv3+, public domain Url: http://octave.sf.net statistics-1.3.0/INDEX0000755000000000000000000000315212776476211012646 0ustar 00000000000000statistics >> Statistics Distributions anderson_darling_cdf bbscdf bbsinv bbspdf bbsrnd betastat binostat binotest burrcdf burrinv burrpdf burrrnd cdf chi2stat cl_multinom copulacdf copulapdf copularnd expstat fstat gamlike gamstat geostat gevcdf gevfit gevfit_lmom gevinv gevlike gevpdf gevrnd gevstat gpcdf gpinv gppdf gprnd hygestat iwishpdf iwishrnd jsucdf jsupdf lognstat mvnpdf mvnrnd mvncdf mnpdf mnrnd mvtcdf mvtpdf mvtrnd nakacdf nakainv nakapdf nakarnd nbinstat normalise_distribution normstat pdf poisstat qrandn random randsample raylcdf raylinv raylpdf raylrnd raylstat tstat tricdf triinv tripdf trirnd unidstat unifstat vmpdf vmrnd wblstat wishpdf wishrnd Descriptive statistics combnk dcov geomean harmmean jackknife mad nanmax nanmean nanmedian nanmin nanstd nansum nanvar trimmean tabulate Experimental design fullfact ff2n Regression anovan crossval monotone_smooth princomp pcares pcacov plsregress regress regress_gp stepwisefit Plots boxplot dendrogram histfit hist3 normplot repanova violin Models hmmestimate hmmgenerate hmmviterbi Hypothesis testing anderson_darling_test runstest signtest ttest ttest2 vartest vartest2 ztest Fitting gamfit Clustering cmdscale kmeans linkage mahal pdist pdist2 squareform Reading and Writing caseread casewrite tblread tblwrite Cvpartition (class of set partitions for cross-validation, used in crossval) @cvpartition/cvpartition @cvpartition/display @cvpartition/get @cvpartition/repartition @cvpartition/set @cvpartition/test @cvpartition/training Categorical data grp2idx statistics-1.3.0/Makefile0000755000000000000000000000507612776476211013523 0ustar 00000000000000## Copyright 2015-2016 Carnë Draug ## Copyright 2015-2016 Oliver Heimlich ## ## Copying and distribution of this file, with or without modification, ## are permitted in any medium without royalty provided the copyright ## notice and this notice are preserved. This file is offered as-is, ## without any warranty. PACKAGE = $(shell grep "^Name: " DESCRIPTION | cut -f2 -d" ") VERSION = $(shell grep "^Version: " DESCRIPTION | cut -f2 -d" ") RELEASE_DIR = $(PACKAGE)-$(VERSION) RELEASE_TARBALL = $(PACKAGE)-$(VERSION).tar.gz HTML_DIR = $(PACKAGE)-html HTML_TARBALL = $(PACKAGE)-html.tar.gz M_SOURCES = $(wildcard inst/*.m) PKG_ADD = $(shell grep -Pho '(?<=// PKG_ADD: ).*' $(M_SOURCES)) OCTAVE ?= octave .PHONY: help dist html release install check run clean help: @echo "Targets:" @echo " dist - Create $(RELEASE_TARBALL) for release" @echo " html - Create $(HTML_TARBALL) for release" @echo " release - Create both of the above and show md5sums" @echo @echo " install - Install the package in GNU Octave" @echo " check - Execute package tests (w/o install)" @echo " run - Run Octave with development in PATH (no install)" @echo @echo " clean - Remove releases, html documentation, and oct files" $(RELEASE_DIR): .hg/dirstate @echo "Creating package version $(VERSION) release ..." -rm -rf "$@" hg archive --exclude ".hg*" --exclude "Makefile" --type files "$@" chmod -R a+rX,u+w,go-w "$@" $(RELEASE_TARBALL): $(RELEASE_DIR) tar cf - --posix "$<" | gzip -9n > "$@" $(HTML_DIR): install @echo "Generating HTML documentation. This may take a while ..." -rm -rf "$@" $(OCTAVE) --silent \ --eval "pkg load generate_html; " \ --eval "pkg load $(PACKAGE);" \ --eval 'generate_package_html ("${PACKAGE}", "$@", "octave-forge");' chmod -R a+rX,u+w,go-w "$@" $(HTML_TARBALL): $(HTML_DIR) tar cf - --posix "$<" | gzip -9n > "$@" dist: $(RELEASE_TARBALL) html: $(HTML_TARBALL) release: dist html md5sum $(RELEASE_TARBALL) $(HTML_TARBALL) @echo "Upload @ https://sourceforge.net/p/octave/package-releases/new/" @echo 'Execute: hg tag "release-${VERSION}"' install: $(RELEASE_TARBALL) @echo "Installing package locally ..." $(OCTAVE) --silent --eval 'pkg ("install", "${RELEASE_TARBALL}")' check: $(OCTAVE) --silent \ --eval 'addpath (fullfile ([pwd filesep "inst"]));' \ --eval '${PKG_ADD}' \ --eval 'runtests ("inst");' run: $(OCTAVE) --no-gui --silent --persist \ --eval 'addpath (fullfile ([pwd filesep "inst"]));' \ --eval '${PKG_ADD}' clean: rm -rf $(RELEASE_DIR) $(RELEASE_TARBALL) $(HTML_TARBALL) $(HTML_DIR) statistics-1.3.0/NEWS0000755000000000000000000001337612776476211012564 0ustar 00000000000000Summary of important user-visible changes for statistics 1.3.0: ------------------------------------------------------------------- ** The following functions are new: bbscdf bbsinv bbspdf bbsrnd binotest burrcdf burrinv burrpdf burrrnd gpcdf gpinv gppdf gprnd grp2idx mahal mvtpdf nakacdf nakainv nakapdf nakarnd pdf tricdf triinv tripdf trirnd violin ** Other functions that have been changed for smaller bugfixes, increased Matlab compatibility, or performance: betastat binostat cdf combnk gevfit hist3 kmeans linkage randsample squareform ttest Summary of important user-visible changes for statistics 1.2.4: ------------------------------------------------------------------- ** Made princomp work with nargout < 2. ** Renamed dendogram to dendrogram. ** Added isempty check to kmeans. ** Transposed output of hist3. ** Converted calculation in hmmviterbi to log space. ** Bug fixes for stepwisefit wishrnd. ** Rewrite of cmdscale for improved compatibility. ** Fix in squareform for improved compatibility. ** New cvpartition class, with methods: display repartition test training ** New sample data file fisheriris.txt for tests ** The following functions are new: cdf crossval dcov pdist2 qrandn randsample signtest ttest ttest2 vartest vartest2 ztest Summary of important user-visible changes for statistics 1.2.3: ------------------------------------------------------------------- ** Made sure that output of nanstd is real. ** Fixed second output of nanmax and nanmin. ** Corrected handle for outliers in boxplot. ** Bug fix and enhanced functionality for mvnrnd. ** The following functions are new: wishrnd iwishrnd wishpdf iwishpdf cmdscale Summary of important user-visible changes for statistics 1.2.2: ------------------------------------------------------------------- ** Fixed documentation of dendogram and hist3 to work with TexInfo 5. Summary of important user-visible changes for statistics 1.2.1: ------------------------------------------------------------------- ** The following functions are new: pcares pcacov runstest stepwisefit hist3 ** dendogram now returns the leaf node numbers and order that the nodes were displayed in. ** New faster implementation of princomp. Summary of important user-visible changes for statistics 1.2.0: ------------------------------------------------------------------- ** The following functions are new: regress_gp dendogram plsregress ** New functions for the generalized extreme value (GEV) distribution: gevcdf gevfit gevfit_lmom gevinv gevlike gevpdf gevrnd gevstat ** The interface of the following functions has been modified: mvnrnd ** `kmeans' has been fixed to deal with clusters that contain only one element. ** `normplot' has been fixed to avoid use of functions that have been removed from Octave core. Also, the plot produced should now display some aesthetic elements and appropriate legends. ** The help text of `mvtrnd' has been improved. ** Package is no longer autoloaded. Summary of important user-visible changes for statistics 1.1.3: ------------------------------------------------------------------- ** The following functions are new in 1.1.3: copularnd mvtrnd ** The functions mnpdf and mnrnd are now also usable for greater numbers of categories for which the rows do not exactly sum to 1. Summary of important user-visible changes for statistics 1.1.2: ------------------------------------------------------------------- ** The following functions are new in 1.1.2: mnpdf mnrnd ** The package is now dependent on the io package (version 1.0.18 or later) since the functions that it depended of from miscellaneous package have been moved to io. ** The function `kmeans' now accepts the 'emptyaction' property with the 'singleton' value. This allows for the kmeans algorithm to handle empty cluster better. It also throws an error if the user does not request an empty cluster handling, and there is an empty cluster. Plus, the returned items are now a closer match to Matlab. Summary of important user-visible changes for statistics 1.1.1: ------------------------------------------------------------------- ** The following functions are new in 1.1.1: monotone_smooth kmeans jackknife ** Bug fixes on the functions: normalise_distribution combnk repanova ** The following functions were removed since equivalents are now part of GNU octave core: zscore ** boxplot.m now returns a structure with handles to the plot elemenets. Summary of important user-visible changes for statistics 1.1.0: ------------------------------------------------------------------- ** IMPORTANT note about `fstat' shadowing core library function: GNU octave's 3.2 release added a new function `fstat' to return information of a file. Statistics' `fstat' computes F mean and variance. Since MatLab's `fstat' is the equivalent to statistics' `fstat' (not to core's `fstat'), and to avoid problems with the statistics package, `fstat' has been deprecated in octave 3.4 and will be removed in Octave 3.8. In the mean time, please ignore this warning when installing the package. ** The following functions are new in 1.1.0: normalise_distribution repanova combnk ** The following functions were removed since equivalents are now part of GNU octave core: prctile ** The __tbl_delim__ function is now private. ** The function `boxplot' now accepts named arguments. ** Bug fixes on the functions: harmmean nanmax nanmin regress ** Small improvements on help text. statistics-1.3.0/inst/@cvpartition/cvpartition.m0000755000000000000000000001406512776476211020220 0ustar 00000000000000## Copyright (C) 2014 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; If not, see . ## -*- texinfo -*- ## @deftypefn{Function File}{@var{C} =} cvpartition (@var{X}, [@var{partition_type}, [@var{k}]]) ## Create a partition object for cross validation. ## ## @var{X} may be a positive integer, interpreted as the number of values @var{n} to partition, or a vector of length @var{n} containing class designations for the elements, in which case the partitioning types @var{KFold} and @var{HoldOut} attempt to ensure each partition represents the classes proportionately. ## ## @var{partition_type} must be one of the following: ## ## @table @asis ## @item @samp{KFold} ## Divide set into @var{k} equal-size subsets (this is the default, with @var{k}=10). ## @item @samp{HoldOut} ## Divide set into two subsets, "training" and "validation". If @var{k} is a fraction, that is the fraction of values put in the validation subset; if it is a positive integer, that is the number of values in the validation subset (by default @var{k}=0.1). ## @item @samp{LeaveOut} ## Leave-one-out partition (each element is placed in its own subset). ## @item @samp{resubstitution} ## Training and validation subsets that both contain all the original elements. ## @item @samp{Given} ## Subset indices are as given in @var{X}. ## @end table ## ## The following fields are defined for the @samp{cvpartition} class: ## ## @table @asis ## @item @samp{classes} ## Class designations for the elements. ## @item @samp{inds} ## Subset indices for the elements. ## @item @samp{n_classes} ## Number of different classes. ## @item @samp{NumObservations} ## @var{n}, number of elements in data set. ## @item @samp{NumTestSets} ## Number of testing subsets. ## @item @samp{TestSize} ## Number of elements in (each) testing subset. ## @item @samp{TrainSize} ## Number of elements in (each) training subset. ## @item @samp{Type} ## Partition type. ## @end table ## ## @seealso{crossval} ## @end deftypefn ## Author: Nir Krakauer function C = cvpartition (X, partition_type = 'KFold', k = []) if (nargin < 1 || nargin > 3 || !isvector(X)) print_usage (); endif if isscalar (X) n = X; n_classes = 1; else n = numel (X); endif switch tolower(partition_type) case {'kfold' 'holdout' 'leaveout' 'resubstitution' 'given'} otherwise warning ('unrecognized type, using KFold') partition_type = 'KFold'; endswitch switch tolower(partition_type) case {'kfold' 'holdout' 'given'} if !isscalar (X) [y, ~, j] = unique (X(:)); n_per_class = accumarray (j, 1); n_classes = numel (n_per_class); endif endswitch C = struct ("classes", [], "inds", [], "n_classes", [], "NumObservations", [], "NumTestSets", [], "TestSize", [], "TrainSize", [], "Type", []); #The non-Matlab fields classes, inds, n_classes are only useful for some methods switch tolower(partition_type) case 'kfold' if isempty (k) k = 10; endif if n_classes == 1 inds = floor((0:(n-1))' * (k / n)) + 1; else inds = nan(n, 1); for i = 1:n_classes if mod (i, 2) #alternate ordering over classes so that the subsets are more nearly the same size inds(j == i) = floor((0:(n_per_class(i)-1))' * (k / n_per_class(i))) + 1; else inds(j == i) = floor(((n_per_class(i)-1):-1:0)' * (k / n_per_class(i))) + 1; endif endfor endif C.inds = inds; C.NumTestSets = k; [~, ~, jj] = unique (inds); n_per_subset = accumarray (jj, 1); C.TrainSize = n - n_per_subset; C.TestSize = n_per_subset; case 'given' C.inds = j; C.NumTestSets = n_classes; C.TrainSize = n - n_per_class; C.TestSize = n_per_class; case 'holdout' if isempty (k) k = 0.1; endif if k < 1 f = k; #target fraction to sample k = round (k * n); #number of samples else f = k / n; endif inds = zeros (n, 1, "logical"); if n_classes == 1 inds(randsample(n, k)) = true; #indices for test set else #sample from each class k_check = 0; for i = 1:n_classes ki = round(f*n_per_class(i)); inds(find(j == i)(randsample(n_per_class(i), ki))) = true; k_check += ki; endfor if k_check < k #add random elements to test set to make it k inds(find(!inds)(randsample(n - k_check, k - k_check))) = true; elseif k_check > k #remove random elements from test set inds(find(inds)(randsample(k_check, k_check - k))) = false; endif C.classes = j; endif C.n_classes = n_classes; C.TrainSize = n - k; C.TestSize = k; C.NumTestSets = 1; C.inds = inds; case 'leaveout' C.TrainSize = ones (n, 1); C.TestSize = (n-1) * ones (n, 1); C.NumTestSets = n; case 'resubstitution' C.TrainSize = C.TestSize = n; C.NumTestSets = 1; endswitch C.NumObservations = n; C.Type = tolower (partition_type); C = class (C, "cvpartition"); endfunction %!demo %! # Partition with Fisher iris dataset (n = 150) %! # Stratified by species %! load fisheriris.txt %! y = fisheriris(:, 1); %! # 10-fold cross-validation partition %! c = cvpartition (y, 'KFold', 10) %! # leave-10-out partition %! c1 = cvpartition (y, 'HoldOut', 10) %! idx1 = test (c, 2); %! idx2 = training (c, 2); %! # another leave-10-out partition %! c2 = repartition (c1) #plot(struct(c).inds, '*') statistics-1.3.0/inst/@cvpartition/display.m0000755000000000000000000000265612776476211017326 0ustar 00000000000000## Copyright (C) 2014 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; If not, see . ## -*- texinfo -*- ## @deftypefn{Function File} display (@var{C}) ## Display a cvpartition object. ## ## @seealso{cvpartition} ## @end deftypefn ## Author: Nir Krakauer function display (C) if nargin != 1 print_usage (); endif switch C.Type case 'kfold' str = 'K-fold'; case 'given' str = 'Given'; case 'holdout' str = 'HoldOut'; case 'leaveout' str = 'Leave-One-Out'; case 'resubstitution' str = 'Resubstitution'; otherwise str = 'Unknown-type'; endswitch disp([str ' cross validation partition']) disp([' N: ' num2str(C.NumObservations)]) disp(['NumTestSets: ' num2str(C.NumTestSets)]) disp([' TrainSize: ' num2str(C.TrainSize')]) disp([' TestSize: ' num2str(C.TestSize')]) statistics-1.3.0/inst/@cvpartition/get.m0000755000000000000000000000247012776476211016432 0ustar 00000000000000## Copyright (C) 2014 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; If not, see . ## -*- texinfo -*- ## @deftypefn{Function File}@var{s} = get (@var{c}, [@var{f}]) ## Get a field from a @samp{cvpartition} object. ## ## @seealso{cvpartition} ## @end deftypefn function s = get (c, f) if (nargin == 1) s = c; elseif (nargin == 2) if (ischar (f)) switch (f) case {"classes", "inds", "n_classes", "NumObservations", "NumTestSets", "TestSize", "TrainSize", "Type"} s = eval(["struct(c)." f]); otherwise error ("get: invalid property %s", f); endswitch else error ("get: expecting the property to be a string"); endif else print_usage (); endif endfunction statistics-1.3.0/inst/@cvpartition/repartition.m0000755000000000000000000000453412776476211020216 0ustar 00000000000000## Copyright (C) 2014 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; If not, see . ## -*- texinfo -*- ## @deftypefn{Function File}{@var{Cnew} =} repartition (@var{C}) ## Return a new cvpartition object. ## ## @var{C} should be a cvpartition object. @var{Cnew} will use the same partition_type as @var{C} but redo any randomization performed (currently, only the HoldOut type uses randomization). ## ## @seealso{cvpartition} ## @end deftypefn ## Author: Nir Krakauer function Cnew = repartition (C) if (nargin < 1 || nargin > 2) print_usage (); endif Cnew = C; switch C.Type case 'kfold' case 'given' case 'holdout' #currently, only the HoldOut method uses randomization n = C.NumObservations; k = C.TestSize; n_classes = C.n_classes; if k < 1 f = k; #target fraction to sample k = round (k * n); #number of samples else f = k / n; endif inds = zeros (n, 1, "logical"); if n_classes == 1 inds(randsample(n, k)) = true; #indices for test set else #sample from each class j = C.classes; #integer class labels n_per_class = accumarray (j, 1); n_classes = numel (n_per_class); k_check = 0; for i = 1:n_classes ki = round(f*n_per_class(i)); inds(find(j == i)(randsample(n_per_class(i), ki))) = true; k_check += ki; endfor if k_check < k #add random elements to test set to make it k inds(find(!inds)(randsample(n - k_check, k - k_check))) = true; elseif k_check > k #remove random elements from test set inds(find(inds)(randsample(k_check, k_check - k))) = false; endif endif Cnew.inds = inds; case 'leaveout' case 'resubstitution' endswitch statistics-1.3.0/inst/@cvpartition/set.m0000755000000000000000000000302112776476211016437 0ustar 00000000000000## Copyright (C) 2014 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; If not, see . ## -*- texinfo -*- ## @deftypefn{Function File}@var{s} = set (@var{c}, @var{varargin}) ## Set field(s) in a @samp{cvpartition} object. ## ## @seealso{cvpartition} ## @end deftypefn function s = set (c, varargin) s = struct(c); if (length (varargin) < 2 || rem (length (varargin), 2) != 0) error ("set: expecting property/value pairs"); endif while (length (varargin) > 1) prop = varargin{1}; val = varargin{2}; varargin(1:2) = []; if (ischar (prop)) switch (prop) case {"classes", "inds", "n_classes", "NumObservations", "NumTestSets", "TestSize", "TrainSize", "Type"} s = setfield (s, prop, val); otherwise error ("set: invalid property %s", f); endswitch else error ("set: expecting the property to be a string"); endif endwhile s = class (s, "cvpartition"); endfunction statistics-1.3.0/inst/@cvpartition/test.m0000755000000000000000000000270312776476211016631 0ustar 00000000000000## Copyright (C) 2014 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; If not, see . ## -*- texinfo -*- ## @deftypefn{Function File}{@var{inds} =} test (@var{C}, [@var{i}]) ## Return logical vector for testing-subset indices from a cvpartition object. ## ## @var{C} should be a cvpartition object. @var{i} is the fold index (default is 1). ## ## @seealso{cvpartition, @@cvpartition/training} ## @end deftypefn ## Author: Nir Krakauer function inds = test (C, i = []) if (nargin < 1 || nargin > 2) print_usage (); endif if nargin < 2 || isempty (i) i = 1; endif switch C.Type case {'kfold' 'given'} inds = C.inds == i; case 'holdout' inds = C.inds; case 'leaveout' inds = zeros(C.NumObservations, 1, "logical"); inds(i) = true; case 'resubstitution' inds = ones(C.NumObservations, 1, "logical"); endswitch statistics-1.3.0/inst/@cvpartition/training.m0000755000000000000000000000271412776476211017467 0ustar 00000000000000## Copyright (C) 2014 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; If not, see . ## -*- texinfo -*- ## @deftypefn{Function File}{@var{inds} =} training (@var{C}, [@var{i}]) ## Return logical vector for training-subset indices from a cvpartition object. ## ## @var{C} should be a cvpartition object. @var{i} is the fold index (default is 1). ## ## @seealso{cvpartition, @@cvpartition/test} ## @end deftypefn ## Author: Nir Krakauer function inds = training (C, i = []) if (nargin < 1 || nargin > 2) print_usage (); endif if nargin < 2 || isempty (i) i = 1; endif switch C.Type case {'kfold' 'given'} inds = C.inds != i; case 'holdout' inds = !C.inds; case 'leaveout' inds = ones (C.NumObservations, 1, "logical"); inds(i) = false; case 'resubstitution' inds = ones (C.NumObservations, 1, "logical"); endswitch statistics-1.3.0/inst/anderson_darling_cdf.m0000755000000000000000000000726612776476211017346 0ustar 00000000000000## Author: Paul Kienzle ## This program is granted to the public domain. ## -*- texinfo -*- ## @deftypefn {Function File} @var{p} = anderson_darling_cdf (@var{A}, @var{n}) ## ## Return the CDF for the given Anderson-Darling coefficient @var{A} ## computed from @var{n} values sampled from a distribution. For a ## vector of random variables @var{x} of length @var{n}, compute the CDF ## of the values from the distribution from which they are drawn. ## You can uses these values to compute @var{A} as follows: ## ## @example ## @var{A} = -@var{n} - sum( (2*i-1) .* (log(@var{x}) + log(1 - @var{x}(@var{n}:-1:1,:))) )/@var{n}; ## @end example ## ## From the value @var{A}, @code{anderson_darling_cdf} returns the probability ## that @var{A} could be returned from a set of samples. ## ## The algorithm given in [1] claims to be an approximation for the ## Anderson-Darling CDF accurate to 6 decimal points. ## ## Demonstrate using: ## ## @example ## n = 300; reps = 10000; ## z = randn(n, reps); ## x = sort ((1 + erf (z/sqrt (2)))/2); ## i = [1:n]' * ones (1, size (x, 2)); ## A = -n - sum ((2*i-1) .* (log (x) + log (1 - x (n:-1:1, :))))/n; ## p = anderson_darling_cdf (A, n); ## hist (100 * p, [1:100] - 0.5); ## @end example ## ## You will see that the histogram is basically flat, which is to ## say that the probabilities returned by the Anderson-Darling CDF ## are distributed uniformly. ## ## You can easily determine the extreme values of @var{p}: ## ## @example ## [junk, idx] = sort (p); ## @end example ## ## The histograms of various @var{p} aren't very informative: ## ## @example ## histfit (z (:, idx (1)), linspace (-3, 3, 15)); ## histfit (z (:, idx (end/2)), linspace (-3, 3, 15)); ## histfit (z (:, idx (end)), linspace (-3, 3, 15)); ## @end example ## ## More telling is the qqplot: ## ## @example ## qqplot (z (:, idx (1))); hold on; plot ([-3, 3], [-3, 3], ';;'); hold off; ## qqplot (z (:, idx (end/2))); hold on; plot ([-3, 3], [-3, 3], ';;'); hold off; ## qqplot (z (:, idx (end))); hold on; plot ([-3, 3], [-3, 3], ';;'); hold off; ## @end example ## ## Try a similarly analysis for @var{z} uniform: ## ## @example ## z = rand (n, reps); x = sort(z); ## @end example ## ## and for @var{z} exponential: ## ## @example ## z = rande (n, reps); x = sort (1 - exp (-z)); ## @end example ## ## [1] Marsaglia, G; Marsaglia JCW; (2004) "Evaluating the Anderson Darling ## distribution", Journal of Statistical Software, 9(2). ## ## @seealso{anderson_darling_test} ## @end deftypefn function y = anderson_darling_cdf(z,n) y = ADinf(z); y += ADerrfix(y,n); end function y = ADinf(z) y = zeros(size(z)); idx = (z < 2); if any(idx(:)) p = [.00168691, -.0116720, .0347962, -.0649821, .247105, 2.00012]; z1 = z(idx); y(idx) = exp(-1.2337141./z1)./sqrt(z1).*polyval(p,z1); end idx = (z >= 2); if any(idx(:)) p = [-.0003146, +.008056, -.082433, +.43424, -2.30695, 1.0776]; y(idx) = exp(-exp(polyval(p,z(idx)))); end end function y = ADerrfix(x,n) if isscalar(n), n = n*ones(size(x)); elseif isscalar(x), x = x*ones(size(n)); end y = zeros(size(x)); c = .01265 + .1757./n; idx = (x >= 0.8); if any(idx(:)) p = [255.7844, -1116.360, 1950.646, -1705.091, 745.2337, -130.2137]; g3 = polyval(p,x(idx)); y(idx) = g3./n(idx); end idx = (x < 0.8 & x > c); if any(idx(:)) p = [1.91864, -8.259, 14.458, -14.6538, 6.54034, -.00022633]; n1 = 1./n(idx); c1 = c(idx); g2 = polyval(p,(x(idx)-c1)./(.8-c1)); y(idx) = (.04213 + .01365*n1).*n1 .* g2; end idx = (x <= c); if any(idx(:)) x1 = x(idx)./c(idx); n1 = 1./n(idx); g1 = sqrt(x1).*(1-x1).*(49*x1-102); y(idx) = ((.0037*n1+.00078).*n1+.00006).*n1 .* g1; end end statistics-1.3.0/inst/anderson_darling_test.m0000755000000000000000000001235012776476211017557 0ustar 00000000000000## Author: Paul Kienzle ## This program is granted to the public domain. ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{q}, @var{Asq}, @var{info}] = } = @ ## anderson_darling_test (@var{x}, @var{distribution}) ## ## Test the hypothesis that @var{x} is selected from the given distribution ## using the Anderson-Darling test. If the returned @var{q} is small, reject ## the hypothesis at the @var{q}*100% level. ## ## The Anderson-Darling @math{@var{A}^2} statistic is calculated as follows: ## ## @example ## @iftex ## A^2_n = -n - \sum_{i=1}^n (2i-1)/n log(z_i (1-z_{n-i+1})) ## @end iftex ## @ifnottex ## n ## A^2_n = -n - SUM (2i-1)/n log(@math{z_i} (1 - @math{z_@{n-i+1@}})) ## i=1 ## @end ifnottex ## @end example ## ## where @math{z_i} is the ordered position of the @var{x}'s in the CDF of the ## distribution. Unlike the Kolmogorov-Smirnov statistic, the ## Anderson-Darling statistic is sensitive to the tails of the ## distribution. ## ## The @var{distribution} argument must be a either @t{"uniform"}, @t{"normal"}, ## or @t{"exponential"}. ## ## For @t{"normal"}' and @t{"exponential"} distributions, estimate the ## distribution parameters from the data, convert the values ## to CDF values, and compare the result to tabluated critical ## values. This includes an correction for small @var{n} which ## works well enough for @var{n} >= 8, but less so from smaller @var{n}. The ## returned @code{info.Asq_corrected} contains the adjusted statistic. ## ## For @t{"uniform"}, assume the values are uniformly distributed ## in (0,1), compute @math{@var{A}^2} and return the corresponding @math{p}-value from ## @code{1-anderson_darling_cdf(A^2,n)}. ## ## If you are selecting from a known distribution, convert your ## values into CDF values for the distribution and use @t{"uniform"}. ## Do not use @t{"uniform"} if the distribution parameters are estimated ## from the data itself, as this sharply biases the @math{A^2} statistic ## toward smaller values. ## ## [1] Stephens, MA; (1986), "Tests based on EDF statistics", in ## D'Agostino, RB; Stephens, MA; (eds.) Goodness-of-fit Techinques. ## New York: Dekker. ## ## @seealso{anderson_darling_cdf} ## @end deftypefn function [q,Asq,info] = anderson_darling_test(x,dist) if size(x,1) == 1, x=x(:); end x = sort(x); n = size(x,1); use_cdf = 0; # Compute adjustment and critical values to use for stats. switch dist case 'normal', # This expression for adj is used in R. # Note that the values from NIST dataplot don't work nearly as well. adj = 1 + (.75 + 2.25/n)/n; qvals = [ 0.1, 0.05, 0.025, 0.01 ]; Acrit = [ 0.631, 0.752, 0.873, 1.035]; x = stdnormal_cdf(zscore(x)); case 'uniform', ## Put invalid data at the limits of the distribution ## This will drive the statistic to infinity. x(x<0) = 0; x(x>1) = 1; adj = 1.; qvals = [ 0.1, 0.05, 0.025, 0.01 ]; Acrit = [ 1.933, 2.492, 3.070, 3.857 ]; use_cdf = 1; case 'XXXweibull', adj = 1 + 0.2/sqrt(n); qvals = [ 0.1, 0.05, 0.025, 0.01 ]; Acrit = [ 0.637, 0.757, 0.877, 1.038]; ## XXX FIXME XXX how to fit alpha and sigma? x = wblcdf (x, ones(n,1)*sigma, ones(n,1)*alpha); case 'exponential', adj = 1 + 0.6/n; qvals = [ 0.1, 0.05, 0.025, 0.01 ]; # Critical values depend on n. Choose the appropriate critical set. # These values come from NIST dataplot/src/dp8.f. Acritn = [ 0, 1.022, 1.265, 1.515, 1.888 11, 1.045, 1.300, 1.556, 1.927; 21, 1.062, 1.323, 1.582, 1.945; 51, 1.070, 1.330, 1.595, 1.951; 101, 1.078, 1.341, 1.606, 1.957; ]; # FIXME: consider interpolating in the critical value table. Acrit = Acritn(lookup(Acritn(:,1),n),2:5); lambda = 1./mean(x); # exponential parameter estimation x = expcdf(x, 1./(ones(n,1)*lambda)); otherwise # FIXME consider implementing more of distributions; a number # of them are defined in NIST dataplot/src/dp8.f. error("Anderson-Darling test for %s not implemented", dist); endswitch if any(x<0 | x>1) error('Anderson-Darling test requires data in CDF form'); endif i = [1:n]'*ones(1,size(x,2)); Asq = -n - sum( (2*i-1) .* (log(x) + log(1-x(n:-1:1,:))) )/n; # Lookup adjusted critical value in the cdf (if uniform) or in the # the critical table. if use_cdf q = 1-anderson_darling_cdf(Asq*adj, n); else idx = lookup([-Inf,Acrit],Asq*adj); q = [1,qvals](idx); endif if nargout > 2, info.Asq = Asq; info.Asq_corrected = Asq*adj; info.Asq_critical = [100*(1-qvals); Acrit]'; info.p = 1-q; info.p_is_precise = use_cdf; endif endfunction %!demo %! c = anderson_darling_test(10*rande(12,10000),'exponential'); %! tabulate(100*c,100*[unique(c),1]); %! % The Fc column should report 100, 250, 500, 1000, 10000 more or less. %!demo %! c = anderson_darling_test(randn(12,10000),'normal'); %! tabulate(100*c,100*[unique(c),1]); %! % The Fc column should report 100, 250, 500, 1000, 10000 more or less. %!demo %! c = anderson_darling_test(rand(12,10000),'uniform'); %! hist(100*c,1:2:99); %! % The histogram should be flat more or less. statistics-1.3.0/inst/anovan.m0000755000000000000000000002737612776476211014507 0ustar 00000000000000## Copyright (C) 2003-2005 Andy Adler ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{pval}, @var{f}, @var{df_b}, @var{df_e}] =} anovan (@var{data}, @var{grps}) ## @deftypefnx {Function File} {[@var{pval}, @var{f}, @var{df_b}, @var{df_e}] =} anovan (@var{data}, @var{grps}, 'param1', @var{value1}) ## Perform a multi-way analysis of variance (ANOVA). The goal is to test ## whether the population means of data taken from @var{k} different ## groups are all equal. ## ## Data is a single vector @var{data} with groups specified by ## a corresponding matrix of group labels @var{grps}, where @var{grps} ## has the same number of rows as @var{data}. For example, if ## @var{data} = [1.1;1.2]; @var{grps}= [1,2,1; 1,5,2]; ## then data point 1.1 was measured under conditions 1,2,1 and ## data point 1.2 was measured under conditions 1,5,2. ## Note that groups do not need to be sequentially numbered. ## ## By default, a 'linear' model is used, computing the N main effects ## with no interactions. this may be modified by param 'model' ## ## p= anovan(data,groups, 'model', modeltype) ## - modeltype = 'linear': compute N main effects ## - modeltype = 'interaction': compute N effects and ## N*(N-1) two-factor interactions ## - modeltype = 'full': compute interactions at all levels ## ## Under the null of constant means, the statistic @var{f} follows an F ## distribution with @var{df_b} and @var{df_e} degrees of freedom. ## ## The p-value (1 minus the CDF of this distribution at @var{f}) is ## returned in @var{pval}. ## ## If no output argument is given, the standard one-way ANOVA table is ## printed. ## ## BUG: DFE is incorrect for modeltypes != full ## @end deftypefn ## Author: Andy Adler ## Based on code by: KH ## $Id$ ## ## TESTING RESULTS: ## 1. ANOVA ACCURACY: www.itl.nist.gov/div898/strd/anova/anova.html ## Passes 'easy' test. Comes close on 'Average'. Fails 'Higher'. ## This could be fixed with higher precision arithmetic ## 2. Matlab anova2 test ## www.mathworks.com/access/helpdesk/help/toolbox/stats/anova2.html ## % From web site: ## popcorn= [ 5.5 4.5 3.5; 5.5 4.5 4.0; 6.0 4.0 3.0; ## 6.5 5.0 4.0; 7.0 5.5 5.0; 7.0 5.0 4.5]; ## % Define groups so reps = 3 ## groups = [ 1 1;1 2;1 3;1 1;1 2;1 3;1 1;1 2;1 3; ## 2 1;2 2;2 3;2 1;2 2;2 3;2 1;2 2;2 3 ]; ## anovan( vec(popcorn'), groups, 'model', 'full') ## % Results same as Matlab output ## 3. Matlab anovan test ## www.mathworks.com/access/helpdesk/help/toolbox/stats/anovan.html ## % From web site ## y = [52.7 57.5 45.9 44.5 53.0 57.0 45.9 44.0]'; ## g1 = [1 2 1 2 1 2 1 2]; ## g2 = {'hi';'hi';'lo';'lo';'hi';'hi';'lo';'lo'}; ## g3 = {'may'; 'may'; 'may'; 'may'; 'june'; 'june'; 'june'; 'june'}; ## anovan( y', [g1',g2',g3']) ## % Fails because we always do interactions function [PVAL, FSTAT, DF_B, DFE] = anovan (data, grps, varargin) if nargin <= 1 usage ("anovan (data, grps)"); end # test supplied parameters modeltype= 'linear'; for idx= 3:2:nargin param= varargin{idx-2}; value= varargin{idx-1}; if strcmp(param, 'model') modeltype= value; # elseif strcmp(param # add other parameters here else error(sprintf('parameter %s is not supported', param)); end end if ~isvector (data) error ("anova: for `anova (data, grps)', data must be a vector"); endif nd = size (grps,1); # number of data points nw = size (grps,2); # number of anova "ways" if (~ isvector (data) || (length(data) ~= nd)) error ("anova: grps must be a matrix of the same number of rows as data"); endif [g,grp_map] = relabel_groups (grps); if strcmp(modeltype, 'linear') max_interact = 1; elseif strcmp(modeltype,'interaction') max_interact = 2; elseif strcmp(modeltype,'full') max_interact = rows(grps); else error(sprintf('modeltype %s is not supported', modeltype)); end ng = length(grp_map); int_tbl = interact_tbl (nw, ng, max_interact ); [gn, gs, gss] = raw_sums(data, g, ng, int_tbl); stats_tbl = int_tbl(2:size(int_tbl,1),:)>0; nstats= size(stats_tbl,1); stats= zeros( nstats+1, 5); # SS, DF, MS, F, p for i= 1:nstats [SS, DF, MS]= factor_sums( gn, gs, gss, stats_tbl(i,:), ng, nw); stats(i,1:3)= [SS, DF, MS]; end # The Mean squared error is the data - avg for each possible measurement # This calculation doesn't work unless there is replication for all grps # SSE= sum( gss(sel) ) - sum( gs(sel).^2 ./ gn(sel) ); SST= gss(1) - gs(1)^2/gn(1); SSE= SST - sum(stats(:,1)); sel = select_pat( ones(1,nw), ng, nw); %incorrect for modeltypes != full DFE= sum( (gn(sel)-1).*(gn(sel)>0) ); MSE= SSE/DFE; stats(nstats+1,1:3)= [SSE, DFE, MSE]; for i= 1:nstats MS= stats(i,3); DF= stats(i,2); F= MS/MSE; pval = 1 - fcdf (F, DF, DFE); stats(i,4:5)= [F, pval]; end if nargout==0; printout( stats, stats_tbl ); else PVAL= stats(1:nstats,5); FSTAT=stats(1:nstats,4); DF_B= stats(1:nstats,2); DF_E= DFE; end endfunction # relabel groups to a mapping from 1 to ng # Input # grps input grouping # Output # g relabelled grouping # grp_map map from output to input grouping function [g,grp_map] = relabel_groups(grps) grp_vec= vec(grps); s= sort (grp_vec); uniq = 1+[0;find(diff(s))]; # mapping from new grps to old groups grp_map = s(uniq); # create new group g ngroups= length(uniq); g= zeros(size(grp_vec)); for i = 1:ngroups g( find( grp_vec== grp_map(i) ) ) = i; end g= reshape(g, size(grps)); endfunction # Create interaction table # # Input: # nw number of "ways" # ng number of ANOVA groups # max_interact maximum number of interactions to consider # default is nw function int_tbl =interact_tbl(nw, ng, max_interact) combin= 2^nw; inter_tbl= zeros( combin, nw); idx= (0:combin-1)'; for i=1:nw; inter_tbl(:,i) = ( rem(idx,2^i) >= 2^(i-1) ); end # find elements with more than max_interact 1's idx = ( sum(inter_tbl',1) > max_interact ); inter_tbl(idx,:) =[]; combin= size(inter_tbl,1); # update value #scale inter_tbl # use ng+1 to map combinations of groups to integers # this would be lots easier with a hash data structure int_tbl = inter_tbl .* (ones(combin,1) * (ng+1).^(0:nw-1) ); endfunction # Calculate sums for each combination # # Input: # g relabelled grouping matrix # ng number of ANOVA groups # max_interact # # Output (virtual (ng+1)x(nw) matrices): # gn number of data sums in each group # gs sum of data in each group # gss sumsqr of data in each group function [gn, gs, gss] = raw_sums(data, g, ng, int_tbl); nw= size(g,2); ndata= size(g,1); gn= gs= gss= zeros((ng+1)^nw, 1); for i=1:ndata # need offset by one for indexing datapt= data(i); idx = 1+ int_tbl*g(i,:)'; gn(idx) +=1; gs(idx) +=datapt; gss(idx) +=datapt^2; end endfunction # Calcualte the various factor sums # Input: # gn number of data sums in each group # gs sum of data in each group # gss sumsqr of data in each group # select binary vector of factor for this "way"? # ng number of ANOVA groups # nw number of ways function [SS,DF]= raw_factor_sums( gn, gs, gss, select, ng, nw); sel= select_pat( select, ng, nw); ss_raw= gs(sel).^2 ./ gn(sel); SS= sum( ss_raw( ~isnan(ss_raw) )); if length(find(select>0))==1 DF= sum(gn(sel)>0)-1; else DF= 1; #this isn't the real DF, but needed to multiply end endfunction function [SS, DF, MS]= factor_sums( gn, gs, gss, select, ng, nw); SS=0; DF=1; ff = find(select); lff= length(ff); # zero terms added, one term subtracted, two added, etc for i= 0:2^lff-1 remove= find( rem( floor( i * 2.^(-lff+1:0) ), 2) ); sel1= select; if ~isempty(remove) sel1( ff( remove ) )=0; end [raw_sum,raw_df]= raw_factor_sums(gn,gs,gss,sel1,ng,nw); add_sub= (-1)^length(remove); SS+= add_sub*raw_sum; DF*= raw_df; end MS= SS/DF; endfunction # Calcualte the various factor sums # Input: # select binary vector of factor for this "way"? # ng number of ANOVA groups # nw number of ways function sel= select_pat( select, ng, nw); # if select(i) is zero, remove nonzeros # if select(i) is zero, remove zero terms for i field=[]; if length(select) ~= nw; error("length of select must be = nw"); end ng1= ng+1; if isempty(field) # expand 0:(ng+1)^nw in base ng+1 field= (0:(ng1)^nw-1)'* ng1.^(-nw+1:0); field= rem( floor( field), ng1); # select zero or non-zero elements field= field>0; end sel= find( all( field == ones(ng1^nw,1)*select(:)', 2) ); endfunction function printout( stats, stats_tbl ); nw= size( stats_tbl,2); [jnk,order]= sort( sum(stats_tbl,2) ); printf('\n%d-way ANOVA Table (Factors A%s):\n\n', nw, ... sprintf(',%c',toascii('A')+(1:nw-1)) ); printf('Source of Variation Sum Sqr df MeanSS Fval p-value\n'); printf('*********************************************************************\n'); printf('Error %10.2f %4d %10.2f\n', stats( size(stats,1),1:3)); for i= order(:)' str= sprintf(' %c x',toascii('A')+find(stats_tbl(i,:)>0)-1 ); str= str(1:length(str)-2); # remove x printf('Factor %15s %10.2f %4d %10.2f %7.3f %7.6f\n', ... str, stats(i,:) ); end printf('\n'); endfunction #{ # Test Data from http://maths.sci.shu.ac.uk/distance/stats/14.shtml data=[7 9 9 8 12 10 ... 9 8 10 11 13 13 ... 9 10 10 12 10 12]'; grp = [1,1; 1,1; 1,2; 1,2; 1,3; 1,3; 2,1; 2,1; 2,2; 2,2; 2,3; 2,3; 3,1; 3,1; 3,2; 3,2; 3,3; 3,3]; data=[7 9 9 8 12 10 9 8 ... 9 8 10 11 13 13 10 11 ... 9 10 10 12 10 12 10 12]'; grp = [1,4; 1,4; 1,5; 1,5; 1,6; 1,6; 1,7; 1,7; 2,4; 2,4; 2,5; 2,5; 2,6; 2,6; 2,7; 2,7; 3,4; 3,4; 3,5; 3,5; 3,6; 3,6; 3,7; 3,7]; # Test Data from http://maths.sci.shu.ac.uk/distance/stats/9.shtml data=[9.5 11.1 11.9 12.8 ... 10.9 10.0 11.0 11.9 ... 11.2 10.4 10.8 13.4]'; grp= [1:4,1:4,1:4]'; # Test Data from http://maths.sci.shu.ac.uk/distance/stats/13.shtml data=[7.56 9.68 11.65 ... 9.98 9.69 10.69 ... 7.23 10.49 11.77 ... 8.22 8.55 10.72 ... 7.59 8.30 12.36]'; grp = [1,1;1,2;1,3; 2,1;2,2;2,3; 3,1;3,2;3,3; 4,1;4,2;4,3; 5,1;5,2;5,3]; # Test Data from www.mathworks.com/ # access/helpdesk/help/toolbox/stats/linear10.shtml data=[23 27 43 41 15 17 3 9 20 63 55 90]; grp= [ 1 1 1 1 2 2 2 2 3 3 3 3; 1 1 2 2 1 1 2 2 1 1 2 2]'; #} statistics-1.3.0/inst/bbscdf.m0000644000000000000000000000756312776476211014441 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} bbscdf (@var{x}, @var{location}, @var{scale}, @var{shape}) ## For each element of @var{x}, compute the cumulative distribution function ## (CDF) at @var{x} of the Birnbaum-Saunders distribution with parameters ## @var{location}, @var{scale} and @var{shape}. ## @end deftypefn ## Author: Dag Lyberg ## Description: CDF of the Birnbaum-Saunders distribution function cdf = bbscdf (x, location, scale, shape) if (nargin != 4) print_usage (); endif if (! isscalar (location) || ! isscalar (scale) || ! isscalar(shape)) [retval, x, location, scale, shape] = ... common_size (x, location, scale, shape); if (retval > 0) error ("bbscdf: X, LOCATION, SCALE and SHAPE must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (location) || iscomplex (scale) ... || iscomplex(shape)) error ("bbscdf: X, LOCATION, SCALE and SHAPE must not be complex"); endif if (isa (x, "single") || isa (location, "single") || isa (scale, "single") ... || isa (shape, "single")) cdf = zeros (size (x), "single"); else cdf = zeros (size (x)); endif k = isnan(x) | ! (-Inf < location) | ! (location < Inf) ... | ! (scale > 0) | ! (scale < Inf) | ! (shape > 0) | ! (shape < Inf); cdf(k) = NaN; k = (x > location) & (x <= Inf) & (-Inf < location) & (location < Inf) ... & (0 < scale) & (scale < Inf) & (0 < shape) & (shape < Inf); if (isscalar (location) && isscalar(scale) && isscalar(shape)) a = x(k) - location; b = sqrt(a ./ scale); cdf(k) = normcdf ((b - b.^-1) / shape); else a = x(k) - location(k); b = sqrt(a ./ scale(k)); cdf(k) = normcdf ((b - b.^-1) ./ shape(k)); endif endfunction %!shared x,y %! x = [-1, 0, 1, 2, Inf]; %! y = [0, 0, 1/2, 0.76024993890652337, 1]; %!assert (bbscdf (x, zeros (1,5), ones (1,5), ones (1,5)), y, eps) %!assert (bbscdf (x, zeros (1,5), 1, 1), y, eps) %!assert (bbscdf (x, 0, ones (1,5), 1), y, eps) %!assert (bbscdf (x, 0, 1, ones (1,5)), y, eps) %!assert (bbscdf (x, 0, 1, 1), y, eps) %!assert (bbscdf (x, [0, 0, NaN, 0, 0], 1, 1), [y(1:2), NaN, y(4:5)], eps) %!assert (bbscdf (x, 0, [1, 1, NaN, 1, 1], 1), [y(1:2), NaN, y(4:5)], eps) %!assert (bbscdf (x, 0, 1, [1, 1, NaN, 1, 1]), [y(1:2), NaN, y(4:5)], eps) %!assert (bbscdf ([x, NaN], 0, 1, 1), [y, NaN], eps) ## Test class of input preserved %!assert (bbscdf (single ([x, NaN]), 0, 1, 1), single ([y, NaN]), eps('single')) %!assert (bbscdf ([x, NaN], single (0), 1, 1), single ([y, NaN]), eps('single')) %!assert (bbscdf ([x, NaN], 0, single (1), 1), single ([y, NaN]), eps('single')) %!assert (bbscdf ([x, NaN], 0, 1, single (1)), single ([y, NaN]), eps('single')) ## Test input validation %!error bbscdf () %!error bbscdf (1) %!error bbscdf (1,2,3) %!error bbscdf (1,2,3,4,5) %!error bbscdf (ones (3), ones (2), ones(2), ones(2)) %!error bbscdf (ones (2), ones (3), ones(2), ones(2)) %!error bbscdf (ones (2), ones (2), ones(3), ones(2)) %!error bbscdf (ones (2), ones (2), ones(2), ones(3)) %!error bbscdf (i, 2, 3, 4) %!error bbscdf (1, i, 3, 4) %!error bbscdf (1, 2, i, 4) %!error bbscdf (1, 2, 3, i) statistics-1.3.0/inst/bbsinv.m0000644000000000000000000001021212776476211014462 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} bbsinv (@var{x}, @var{location}, @var{scale}, @var{shape}) ## For each element of @var{x}, compute the quantile (the inverse of the CDF) ## at @var{x} of the Birnbaum-Saunders distribution with parameters ## @var{location}, @var{scale}, and @var{shape}. ## @end deftypefn ## Author: Dag Lyberg ## Description: Quantile function of the Birnbaum-Saunders distribution function inv = bbsinv (x, location, scale, shape) if (nargin != 4) print_usage (); endif if (! isscalar (location) || ! isscalar (scale) || ! isscalar(shape)) [retval, x, location, scale, shape] = ... common_size (x, location, scale, shape); if (retval > 0) error ("bbsinv: X, LOCATION, SCALE and SHAPE must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (location) ... || iscomplex (scale) || iscomplex(shape)) error ("bbsinv: X, LOCATION, SCALE and SHAPE must not be complex"); endif if (isa (x, "single") || isa (location, "single") ... || isa (scale, "single") || isa (shape, "single")) inv = zeros (size (x), "single"); else inv = zeros (size (x)); endif k = isnan (x) | (x < 0) | (x > 1) | ! (-Inf < location) | ! (location < Inf) ... | ! (scale > 0) | ! (scale < Inf) | ! (shape > 0) | ! (shape < Inf); inv(k) = NaN; k = (x <= 0) & (-Inf < location) & (location < Inf) ... & (scale > 0) & (scale < Inf) & (shape > 0) & (shape < Inf); inv(k) = 0; k = (x == 1) & (-Inf < location) & (location < Inf) ... & (scale > 0) & (scale < Inf) & (shape > 0) & (shape < Inf); inv(k) = Inf; k = (0 < x) & (x < 1) & (location < Inf) & (0 < scale) & (scale < Inf) ... & (0 < shape) & (shape < Inf); if (isscalar (location) && isscalar(scale) && isscalar(shape)) y = shape * norminv (x(k)); inv(k) = location + scale * (y + sqrt (4 + y.^2)).^2 / 4; else y = shape(k) .* norminv (x(k)); inv(k) = location(k) + scale(k) .* (y + sqrt (4 + y.^2)).^2 ./ 4; endif endfunction %!shared x,y,f %! f = @(x,a,b,c) (a + b * (c * norminv (x) + sqrt (4 + (c * norminv(x))^2))^2) / 4; %! x = [-1, 0, 1/4, 1/2, 1, 2]; %! y = [0, 0, f(1/4, 0, 1, 1), 1, Inf, NaN]; %!assert (bbsinv (x, zeros (1,6), ones (1,6), ones (1,6)), y) %!assert (bbsinv (x, zeros (1,6), 1, 1), y) %!assert (bbsinv (x, 0, ones (1,6), 1), y) %!assert (bbsinv (x, 0, 1, ones (1,6)), y) %!assert (bbsinv (x, 0, 1, 1), y) %!assert (bbsinv (x, [0, 0, 0, NaN, 0, 0], 1, 1), [y(1:3), NaN, y(5:6)]) %!assert (bbsinv (x, 0, [1, 1, 1, NaN, 1, 1], 1), [y(1:3), NaN, y(5:6)]) %!assert (bbsinv (x, 0, 1, [1, 1, 1, NaN, 1, 1]), [y(1:3), NaN, y(5:6)]) %!assert (bbsinv ([x, NaN], 0, 1, 1), [y, NaN]) ## Test class of input preserved %!assert (bbsinv (single ([x, NaN]), 0, 1, 1), single ([y, NaN])) %!assert (bbsinv ([x, NaN], single (0), 1, 1), single ([y, NaN])) %!assert (bbsinv ([x, NaN], 0, single (1), 1), single ([y, NaN])) %!assert (bbsinv ([x, NaN], 0, 1, single (1)), single ([y, NaN])) ## Test input validation %!error bbsinv () %!error bbsinv (1) %!error bbsinv (1,2,3) %!error bbsinv (1,2,3,4,5) %!error bbsinv (ones (3), ones (2), ones(2), ones(2)) %!error bbsinv (ones (2), ones (3), ones(2), ones(2)) %!error bbsinv (ones (2), ones (2), ones(3), ones(2)) %!error bbsinv (ones (2), ones (2), ones(2), ones(3)) %!error bbsinv (i, 2, 3, 4) %!error bbsinv (1, i, 3, 4) %!error bbsinv (1, 2, i, 4) %!error bbsinv (1, 2, 3, i) statistics-1.3.0/inst/bbspdf.m0000644000000000000000000000776612776476211014463 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} bbspdf (@var{x}, @var{location}, @var{scale}, @var{shape}) ## For each element of @var{x}, compute the probability density function (PDF) ## at @var{x} of the Birnbaum-Saunders distribution with parameters ## @var{location}, @var{scale} and @var{shape}. ## @end deftypefn ## Author: Dag Lyberg ## Description: PDF of the Birnbaum-Saunders distribution function pdf = bbspdf (x, location, scale, shape) if (nargin != 4) print_usage (); endif if (! isscalar (location) || ! isscalar (scale) || ! isscalar(shape)) [retval, x, location, scale, shape] = ... common_size (x, location, scale, shape); if (retval > 0) error ("bbspdf: X, LOCATION, SCALE and SHAPE must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (location) ... || iscomplex (scale) || iscomplex(shape)) error ("bbspdf: X, LOCATION, SCALE and SHAPE must not be complex"); endif if (isa (x, "single") || isa (location, "single") || isa (scale, "single") ... || isa (shape, "single")) pdf = zeros (size (x), "single"); else pdf = zeros (size (x)); endif k = isnan (x) | ! (-Inf < location) | ! (location < Inf) ... | ! (scale > 0) | ! (scale < Inf) ... | ! (shape > 0) | ! (shape < Inf); pdf(k) = NaN; k = (x > location) & (x < Inf) & (-Inf < location) ... & (location < Inf) & (0 < scale) & (scale < Inf) ... & (0 < shape) & (shape < Inf); if (isscalar (location) && isscalar(scale) && isscalar(shape)) a = x(k) - location; b = sqrt(a ./ scale); pdf(k) = ((b + b.^-1) ./ (2 * shape * a)) ... .* normpdf ((b - b.^-1) / shape); else a = x(k) - location(k); b = sqrt(a ./ scale(k)); pdf(k) = ((b + b.^-1) ./ (2 * shape(k).* a)) ... .* normpdf ((b - b.^-1) ./ shape(k)); endif endfunction %!shared x,y %! x = [-1, 0, 1, 2, Inf]; %! y = [0, 0, 0.3989422804014327, 0.1647717335503959, 0]; %!assert (bbspdf (x, zeros (1,5), ones (1,5), ones (1,5)), y, eps) %!assert (bbspdf (x, zeros (1,5), 1, 1), y, eps) %!assert (bbspdf (x, 0, ones (1,5), 1), y, eps) %!assert (bbspdf (x, 0, 1, ones (1,5)), y, eps) %!assert (bbspdf (x, 0, 1, 1), y, eps) %!assert (bbspdf (x, [0, 0, NaN, 0, 0], 1, 1), [y(1:2), NaN, y(4:5)], eps) %!assert (bbspdf (x, 0, [1, 1, NaN, 1, 1], 1), [y(1:2), NaN, y(4:5)], eps) %!assert (bbspdf (x, 0, 1, [1, 1, NaN, 1, 1]), [y(1:2), NaN, y(4:5)], eps) %!assert (bbspdf ([x, NaN], 0, 1, 1), [y, NaN], eps) ## Test class of input preserved %!assert (bbspdf (single ([x, NaN]), 0, 1, 1), single ([y, NaN]), eps('single')) %!assert (bbspdf ([x, NaN], single (0), 1, 1), single ([y, NaN]), eps('single')) %!assert (bbspdf ([x, NaN], 0, single (1), 1), single ([y, NaN]), eps('single')) %!assert (bbspdf ([x, NaN], 0, 1, single (1)), single ([y, NaN]), eps('single')) ## Test input validation %!error bbspdf () %!error bbspdf (1) %!error bbspdf (1,2,3) %!error bbspdf (1,2,3,4,5) %!error bbspdf (ones (3), ones (2), ones(2), ones(2)) %!error bbspdf (ones (2), ones (3), ones(2), ones(2)) %!error bbspdf (ones (2), ones (2), ones(3), ones(2)) %!error bbspdf (ones (2), ones (2), ones(2), ones(3)) %!error bbspdf (i, 2, 3, 4) %!error bbspdf (1, i, 3, 4) %!error bbspdf (1, 2, i, 4) %!error bbspdf (1, 2, 3, i) statistics-1.3.0/inst/bbsrnd.m0000644000000000000000000001235312776476211014461 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} bbsrnd (@var{location}, @var{scale}, @var{shape}) ## @deftypefnx {} {} bbsrnd (@var{location}, @var{scale}, @var{shape}, @var{r}) ## @deftypefnx {} {} bbsrnd (@var{location}, @var{scale}, @var{shape}, @var{r}, @var{c}, @dots{}) ## @deftypefnx {} {} bbsrnd (@var{location}, @var{scale}, @var{shape}, [@var{sz}]) ## Return a matrix of random samples from the generalized Pareto distribution ## with parameters @var{location}, @var{scale} and @var{shape}. ## ## When called with a single size argument, return a square matrix with ## the dimension specified. When called with more than one scalar argument the ## first two arguments are taken as the number of rows and columns and any ## further arguments specify additional matrix dimensions. The size may also ## be specified with a vector of dimensions @var{sz}. ## ## If no size arguments are given then the result matrix is the common size of ## @var{location}, @var{scale} and @var{shape}. ## @end deftypefn ## Author: Dag Lyberg ## Description: Random deviates from the Birnbaum-Saunders distribution function rnd = bbsrnd (location, scale, shape, varargin) if (nargin < 3) print_usage (); endif if (! isscalar (location) || ! isscalar (scale) || ! isscalar (shape)) [retval, location, scale, shape] = common_size (location, scale, shape); if (retval > 0) error ("bbsrnd: LOCATION, SCALE and SHAPE must be of common size or scalars"); endif endif if (iscomplex (location) || iscomplex (scale) || iscomplex (shape)) error ("bbsrnd: LOCATION, SCALE and SHAPE must not be complex"); endif if (nargin == 3) sz = size (location); elseif (nargin == 4) if (isscalar (varargin{1}) && varargin{1} >= 0) sz = [varargin{1}, varargin{1}]; elseif (isrow (varargin{1}) && all (varargin{1} >= 0)) sz = varargin{1}; else error ("bbsrnd: dimension vector must be row vector of non-negative integers"); endif elseif (nargin > 3) if (any (cellfun (@(x) (! isscalar (x) || x < 0), varargin))) error ("bbsrnd: dimensions must be non-negative integers"); endif sz = [varargin{:}]; endif if (! isscalar (location) && ! isequal (size (location), sz)) error ("bbsrnd: LOCATION, SCALE and SHAPE must be scalar or of size SZ"); endif if (isa (location, "single") || isa (scale, "single") || isa (shape, "single")) cls = "single"; else cls = "double"; endif if (isscalar (location) && isscalar (scale) && isscalar (shape)) if ((-Inf < location) && (location < Inf) ... && (0 < scale) && (scale < Inf) ... && (0 < shape) && (shape < Inf)) rnd = rand(sz,cls); y = shape * norminv (rnd); rnd = location + scale * (y + sqrt (4 + y.^2)).^2 / 4; else rnd = NaN (sz, cls); endif else rnd = NaN (sz, cls); k = (-Inf < location) & (location < Inf) ... & (0 < scale) & (scale < Inf) ... & (0 < shape) & (shape < Inf); rnd(k) = rand(sum(k(:)),1); y = shape(k) .* norminv (rnd(k)); rnd(k) = location(k) + scale(k) .* (y + sqrt (4 + y.^2)).^2 / 4; endif endfunction %!assert (size (bbsrnd (0, 1, 1)), [1 1]) %!assert (size (bbsrnd (zeros (2,1), 1, 1)), [2, 1]) %!assert (size (bbsrnd (zeros (2,2), 1, 1)), [2, 2]) %!assert (size (bbsrnd (0, ones (2,1), 1)), [2, 1]) %!assert (size (bbsrnd (0, ones (2,2), 1)), [2, 2]) %!assert (size (bbsrnd (0, 1, ones (2,1))), [2, 1]) %!assert (size (bbsrnd (0, 1, ones (2,2))), [2, 2]) %!assert (size (bbsrnd (0, 1, 1, 3)), [3, 3]) %!assert (size (bbsrnd (0, 1, 1, [4 1])), [4, 1]) %!assert (size (bbsrnd (0, 1, 1, 4, 1)), [4, 1]) ## Test class of input preserved %!assert (class (bbsrnd (0,1,1)), "double") %!assert (class (bbsrnd (single (0),1,1)), "single") %!assert (class (bbsrnd (single ([0 0]),1,1)), "single") %!assert (class (bbsrnd (0,single (1),1)), "single") %!assert (class (bbsrnd (0,single ([1 1]),1)), "single") %!assert (class (bbsrnd (0,1,single (1))), "single") %!assert (class (bbsrnd (0,1,single ([1 1]))), "single") ## Test input validation %!error bbsrnd () %!error bbsrnd (1) %!error bbsrnd (1,2) %!error bbsrnd (ones (3), ones (2), ones (2), 2) %!error bbsrnd (ones (2), ones (3), ones (2), 2) %!error bbsrnd (ones (2), ones (2), ones (3), 2) %!error bbsrnd (i, 2, 3) %!error bbsrnd (1, i, 3) %!error bbsrnd (1, 2, i) %!error bbsrnd (1,2,3, -1) %!error bbsrnd (1,2,3, ones (2)) %!error bbsrnd (1,2,3, [2 -1 2]) %!error bbsrnd (ones (2),1,2, 3) %!error bbsrnd (ones (2),1,2, [3, 2]) %!error bbsrnd (ones (2),1,2, 3, 2) statistics-1.3.0/inst/betastat.m0000755000000000000000000000710012776476211015013 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} betastat (@var{a}, @var{b}) ## Compute mean and variance of the beta distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{a} is the first parameter of the beta distribution. @var{a} must be ## positive ## ## @item ## @var{b} is the second parameter of the beta distribution. @var{b} must be ## positive ## @end itemize ## @var{a} and @var{b} must be of common size or one of them must be scalar ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the beta distribution ## ## @item ## @var{v} is the variance of the beta distribution ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## a = 1:6; ## b = 1:0.2:2; ## [m, v] = betastat (a, b) ## @end group ## ## @group ## [m, v] = betastat (a, 1.5) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the beta distribution function [m, v] = betastat (a, b) if (nargin != 2) print_usage (); elseif (! isscalar (a) && ! isscalar (b) && ! size_equal (a, b)) error ("betastat: a and b must be of common size or scalar"); endif k = find (! (a > 0 & b > 0)); # Calculate moments a_b = a + b; m = a ./ (a_b); m(k) = NaN; if (nargout > 1) v = (a .* b) ./ ((a_b .^ 2) .* (a_b + 1)); v(k) = NaN; endif endfunction %!test %! a = -2:6; %! b = 0.4:0.2:2; %! [m, v] = betastat (a, b); %! expected_m = [NaN NaN NaN 1/2 2/3.2 3/4.4 4/5.6 5/6.8 6/8]; %! expected_v = [NaN NaN NaN 0.0833, 0.0558, 0.0402, 0.0309, 0.0250, 0.0208]; %! assert (m, expected_m, eps*100); %! assert (v, expected_v, 0.001); %!test %! a = -2:1:6; %! [m, v] = betastat (a, 1.5); %! expected_m = [NaN NaN NaN 1/2.5 2/3.5 3/4.5 4/5.5 5/6.5 6/7.5]; %! expected_v = [NaN NaN NaN 0.0686, 0.0544, 0.0404, 0.0305, 0.0237, 0.0188]; %! assert (m, expected_m); %! assert (v, expected_v, 0.001); %!test %! a = [14 Inf 10 NaN 10]; %! b = [12 9 NaN Inf 12]; %! [m, v] = betastat (a, b); %! expected_m = [14/26 NaN NaN NaN 10/22]; %! expected_v = [168/18252 NaN NaN NaN 120/11132]; %! assert (m, expected_m); %! assert (v, expected_v); %!assert (nthargout (1:2, @betastat, 5, []), {[], []}) %!assert (nthargout (1:2, @betastat, [], 5), {[], []}) %!assert (nthargout (1:2, @betastat, "", 5), {[], []}) %!assert (nthargout (1:2, @betastat, true, 5), {1/6, 5/252}) %!assert (size (betastat (rand (10, 5, 4), rand (10, 5, 4))), [10 5 4]) %!assert (size (betastat (rand (10, 5, 4), 7)), [10 5 4]) statistics-1.3.0/inst/binostat.m0000755000000000000000000000726612776476211015044 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## Copyright (C) 2015 Carnë Draug ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} binostat (@var{n}, @var{p}) ## Compute mean and variance of the binomial distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{n} is the first parameter of the binomial distribution. The elements ## of @var{n} must be natural numbers ## ## @item ## @var{p} is the second parameter of the binomial distribution. The ## elements of @var{p} must be probabilities ## @end itemize ## @var{n} and @var{p} must be of common size or one of them must be scalar ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the binomial distribution ## ## @item ## @var{v} is the variance of the binomial distribution ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## n = 1:6; ## p = 0:0.2:1; ## [m, v] = binostat (n, p) ## @end group ## ## @group ## [m, v] = binostat (n, 0.5) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the binomial distribution function [m, v] = binostat (n, p) if (nargin != 2) print_usage (); elseif (! isscalar (n) && ! isscalar (p) && ! size_equal (n, p)) error ("binostat: N and P must be of common size or scalar"); endif k = find (! (n > 0 & fix (n) == n & p >= 0 & p <= 1)); # Calculate moments m = n .* p; m(k) = NaN; if (nargout > 1) v = m .* (1 - p); v(k) = NaN; endif endfunction %!test %! n = 1:6; %! p = 0:0.2:1; %! [m, v] = binostat (n, p); %! expected_m = [0.00, 0.40, 1.20, 2.40, 4.00, 6.00]; %! expected_v = [0.00, 0.32, 0.72, 0.96, 0.80, 0.00]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); %!test %! n = 1:6; %! [m, v] = binostat (n, 0.5); %! expected_m = [0.50, 1.00, 1.50, 2.00, 2.50, 3.00]; %! expected_v = [0.25, 0.50, 0.75, 1.00, 1.25, 1.50]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); %!test %! n = [-Inf -3 5 0.5 3 NaN 100, Inf]; %! [m, v] = binostat (n, 0.5); %! assert (isnan (m), [true true false true false true false false]) %! assert (isnan (v), [true true false true false true false false]) %! assert (m(end), Inf); %! assert (v(end), Inf); %!assert (nthargout (1:2, @binostat, 5, []), {[], []}) %!assert (nthargout (1:2, @binostat, [], 5), {[], []}) %!assert (nthargout (1:2, @binostat, "", 5), {[], []}) %!assert (nthargout (1:2, @binostat, true, 5), {NaN, NaN}) %!assert (nthargout (1:2, @binostat, 5, true), {5, 0}) %!assert (size (binostat (randi (100, 10, 5, 4), rand (10, 5, 4))), [10 5 4]) %!assert (size (binostat (randi (100, 10, 5, 4), 7)), [10 5 4]) statistics-1.3.0/inst/binotest.m0000644000000000000000000001225612776476211015040 0ustar 00000000000000## Copyright (C) 2016 Andreas Stahel ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{h}, @var{pval}, @var{ci}] =} binotest (@var{pos},@var{N},@var{p0}) ## @deftypefnx {Function File} {[@var{h}, @var{pval}, @var{ci}] =} binotest (@var{pos},@var{N},@var{p0},@var{Name},@var{Value}) ## Test for probability @var{p} of a binomial sample ## ## Perform a test of the null hypothesis @var{p} == @var{p0} for a sample ## of size @var{N} with @var{pos} positive results ## ## ## Name-Value pair arguments can be used to set various options. ## @qcode{"alpha"} can be used to specify the significance level ## of the test (the default value is 0.05). The option @qcode{"tail"}, ## can be used to select the desired alternative hypotheses. If the ## value is @qcode{"both"} (default) the null is tested against the two-sided ## alternative @code{@var{p} != @var{p0}}. The value of @var{pval} is ## determined by adding the probabilities of all event less or equally ## likely than the observed number @var{pos} of positive events. ## If the value of @qcode{"tail"} is @qcode{"right"} ## the one-sided alternative @code{@var{p} > @var{p0}} is considered. ## Similarly for @qcode{"left"}, the one-sided alternative ## @code{@var{p} < @var{p0}} is considered. ## ## If @var{h} is 0 the null hypothesis is accepted, if it is 1 the null ## hypothesis is rejected. The p-value of the test is returned in @var{pval}. ## A 100(1-alpha)% confidence interval is returned in @var{ci}. ## ## @end deftypefn ## Author: Andreas Stahel function [h, p, ci] = binotest(pos,n,p0,varargin) % Set default arguments alpha = 0.05; tail = 'both'; i = 1; while ( i <= length(varargin) ) switch lower(varargin{i}) case 'alpha' i = i + 1; alpha = varargin{i}; case 'tail' i = i + 1; tail = varargin{i}; otherwise error('Invalid Name argument.',[]); end i = i + 1; end if ~isa(tail,'char') error('tail argument to vartest must be a string\n',[]); end if (n<=0) error('binotest: required n>0\n',[]); end if (p0<0)|(p0>1) error('binotest: required 0<= p0 <= 1\n',[]); end if (pos<0)|(pos>n) error('binotest: required 0<= pos <= n\n',[]); end % Based on the "tail" argument determine the P-value, the critical values, % and the confidence interval. switch lower(tail) case 'both' A_low = binoinv(alpha/2,n,p0)/n; A_high = binoinv(1-alpha/2,n,p0)/n; p_pos = binopdf(pos,n,p0); p_all = binopdf([0:n],n,p0); ind = find(p_all <=p_pos); % p = min(1,sum(p_all(ind))); p = sum(p_all(ind)); if pos==0 p_low = 0; else p_low = fzero(@(pl)1-binocdf(pos-1,n,pl)-alpha/2,[0 1]); endif if pos==n p_high = 1; else p_high = fzero(@(ph) binocdf(pos,n,ph) -alpha/2,[0,1]); endif ci = [p_low,p_high]; case 'left' p = 1-binocdf(pos-1,n,p0); if pos==n p_high = 1; else p_high = fzero(@(ph) binocdf(pos,n,ph) -alpha,[0,1]); endif ci = [0, p_high]; case 'right' p = binocdf(pos,n,p0); if pos==0 p_low = 0; else p_low = fzero(@(pl)1-binocdf(pos-1,n,pl)-alpha,[0 1]); endif ci = [p_low 1]; otherwise error('Invalid fifth (tail) argument to binotest\n',[]); end % Determine the test outcome % MATLAB returns this a double instead of a logical array h = double(p < alpha); end %!demo %! % flip a coin 1000 times, showing 475 heads %! % Hypothesis: coin is fair, i.e. p=1/2 %! [h,p_val,ci] = binotest(475,1000,0.5) %! % Result: h = 0 : null hypothesis not rejected, coin could be fair %! % P value 0.12, i.e. hypothesis not rejected for alpha up to 12% %! % 0.444 <= p <= 0.506 with 95% confidence %!demo %! % flip a coin 100 times, showing 65 heads %! % Hypothesis: coin shows less than 50% heads, i.e. p<=1/2 %! [h,p_val,ci] = binotest(65,100,0.5,'tail','left','alpha',0.01) %! % Result: h = 1 : null hypothesis is rejected, i.e. coin shows more heads than tails %! % P value 0.0018, i.e. hypothesis not rejected for alpha up to 0.18% %! % 0 <= p <= 0.76 with 99% confidence %!test #example from https://en.wikipedia.org/wiki/Binomial_test %! [h,p_val,ci] = binotest (51,235,1/6); %! assert (p_val, 0.0437, 0.00005) %! [h,p_val,ci] = binotest (51,235,1/6,'tail','left'); %! assert (p_val, 0.027, 0.0005) statistics-1.3.0/inst/boxplot.m0000755000000000000000000002521412776476211014701 0ustar 00000000000000## Copyright (C) 2002 Alberto Terruzzi ## Copyright (C) 2006 Alberto Pose ## Copyright (C) 2011 Pascal Dupuis ## Copyright (C) 2012 Juan Pablo Carbajal ## Copyright (C) 2016 Pascal Dupuis ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{s} =} boxplot (@var{data}, @var{notched}, @ ## @var{symbol}, @var{vertical}, @var{maxwhisker}, @dots{}) ## @deftypefnx {Function File} {@var{s} =} boxplot (@var{data}, @var{group}) ## @deftypefnx {Function File} {[@dots{} @var{h}]=} boxplot (@dots{}) ## ## Produce a box plot. ## ## The box plot is a graphical display that simultaneously describes several ## important features of a data set, such as center, spread, departure from ## symmetry, and identification of observations that lie unusually far from ## the bulk of the data. ## ## @var{data} is a matrix with one column for each data set, or data is a cell ## vector with one cell for each data set. ## ## @var{notched} = 1 produces a notched-box plot. Notches represent a robust ## estimate of the uncertainty about the median. ## ## @var{notched} = 0 (default) produces a rectangular box plot. ## ## @var{notched} in (0,1) produces a notch of the specified depth. ## notched values outside (0,1) are amusing if not exactly practical. ## ## @var{symbol} sets the symbol for the outlier values, default symbol for ## points that lie outside 3 times the interquartile range is 'o', ## default symbol for points between 1.5 and 3 times the interquartile ## range is '+'. ## ## @var{symbol} = '.' points between 1.5 and 3 times the IQR is marked with ## '.' and points outside 3 times IQR with 'o'. ## ## @var{symbol} = ['x','*'] points between 1.5 and 3 times the IQR is marked with ## 'x' and points outside 3 times IQR with '*'. ## ## @var{vertical} = 0 makes the boxes horizontal, by default @var{vertical} = 1. ## ## @var{maxwhisker} defines the length of the whiskers as a function of the IQR ## (default = 1.5). If @var{maxwhisker} = 0 then @code{boxplot} displays all data ## values outside the box using the plotting symbol for points that lie ## outside 3 times the IQR. ## ## Supplemental arguments are concatenated and passed to plot. ## ## The returned matrix @var{s} has one column for each data set as follows: ## ## @multitable @columnfractions .1 .8 ## @item 1 @tab Minimum ## @item 2 @tab 1st quartile ## @item 3 @tab 2nd quartile (median) ## @item 4 @tab 3rd quartile ## @item 5 @tab Maximum ## @item 6 @tab Lower confidence limit for median ## @item 7 @tab Upper confidence limit for median ## @end multitable ## ## The returned structure @var{h} has handles to the plot elements, allowing ## customization of the visualization using set/get functions. ## ## Example ## ## @example ## title ("Grade 3 heights"); ## axis ([0,3]); ## set(gca (), "xtick", [1 2], "xticklabel", @{"girls", "boys"@}); ## boxplot (@{randn(10,1)*5+140, randn(13,1)*8+135@}); ## @end example ## ## @end deftypefn function [s hs] = boxplot (data, varargin) ## assign parameter defaults if (nargin < 1) print_usage; endif %# default values maxwhisker = 1.5; vertical = 1; symbol = ['+', 'o']; notched = 0; plot_opts = {}; groups = []; %# Optional arguments analysis numarg = nargin - 1; option_args = ['Notch'; 'Symbol'; 'Vertical'; 'Maxwhisker']; indopt = 1; while (numarg) dummy = varargin{indopt++}; if (!ischar (dummy)) %# MatLAB allows passing the second argument as a grouping vector if (length (dummy) > 1) if (2 ~= indopt) error ('Boxplot.m: grouping vector may only be passed as second arg'); endif groups = dummy; else %# old way: positional argument switch indopt case 2 notched = dummy; case 4 vertical = dummy; case 5 maxwhisker = dummy; otherwise error("No positional argument allowed at position %d", --indopt); endswitch endif numarg--; continue; else if (3 == indopt && length (dummy) <= 2) symbol = dummy; numarg--; continue; else tt = strmatch(dummy, option_args); switch (tt) case 1 notched = varargin{indopt}; case 2 symbol = varargin{indopt}; case 3 vertical = varargin{indopt}; case 4 maxwhisker = varargin{indopt}; otherwise %# take two args and append them to plot_opts plot_opts(1, end+1:end+2) = {dummy, varargin{indopt}}; endswitch endif indopt++; numarg -= 2; endif endwhile if (1 == length (symbol)) symbol(2) = symbol(1); endif if (1 == notched) notched = 0.25; endif a = 1-notched; ## figure out how many data sets we have if (isempty (groups)) if (iscell (data)) nc = length (data); else if (isvector (data)) data = data(:); endif nc = columns (data); endif groups = (1:nc); else if (~isvector (data)) error ('Boxplot.m: with the formalism (data, group), both must be vectors'); end nc = unique (groups); dummy = cell (1, length (nc)); for indopt = (1:length (nc)) dummy(indopt) = data(groups == nc(indopt)); end data = dummy; groups = nc(:).'; nc = length (nc); end ## compute statistics ## s will contain ## 1,5 min and max ## 2,3,4 1st, 2nd and 3rd quartile ## 6,7 lower and upper confidence intervals for median s = zeros (7, nc); box = zeros (1, nc); whisker_x = ones (2,1)*[groups, groups]; whisker_y = zeros (2, 2*nc); outliers_x = []; outliers_y = []; outliers2_x = []; outliers2_y = []; for indi = (1:nc) ## Get the next data set from the array or cell array if (iscell (data)) col = data{indi}(:); else col = data(:, indi); endif ## Skip missing data col(isnan (col) | isna (col)) = []; ## Remember the data length nd = length (col); box(indi) = nd; if (nd > 1) ## min,max and quartiles s(1:5, indi) = statistics (col)(1:5); ## confidence interval for the median est = 1.57*(s(4, indi)-s(2, indi))/sqrt (nd); s(6, indi) = max ([s(3, indi)-est, s(2, indi)]); s(7, indi) = min ([s(3, indi)+est, s(4, indi)]); ## whiskers out to the last point within the desired inter-quartile range IQR = maxwhisker*(s(4, indi)-s(2, indi)); whisker_y(:, indi) = [min(col(col >= s(2, indi)-IQR)); s(2, indi)]; whisker_y(:,nc+indi) = [max(col(col <= s(4, indi)+IQR)); s(4, indi)]; ## outliers beyond 1 and 2 inter-quartile ranges outliers = col((col < s(2, indi)-IQR & col >= s(2, indi)-2*IQR) | (col > s(4, indi)+IQR & col <= s(4, indi)+2*IQR)); outliers2 = col(col < s(2, indi)-2*IQR | col > s(4, indi)+2*IQR); outliers_x = [outliers_x; groups(indi)*ones(size(outliers))]; outliers_y = [outliers_y; outliers]; outliers2_x = [outliers2_x; groups(indi)*ones(size(outliers2))]; outliers2_y = [outliers2_y; outliers2]; elseif (1 == nd) ## all statistics collapse to the value of the point s(:, indi) = col; ## single point data sets are plotted as outliers. outliers_x = [outliers_x; groups(indi)]; outliers_y = [outliers_y; col]; else ## no statistics if no points s(:, indi) = NaN; end end ## Note which boxes don't have enough stats chop = find (box <= 1); ## Draw a box around the quartiles, with width proportional to the number of ## items in the box. Draw notches if desired. box *= 0.4/max (box); quartile_x = ones (11,1)*groups + [-a;-1;-1;1;1;a;1;1;-1;-1;-a]*box; quartile_y = s([3,7,4,4,7,3,6,2,2,6,3],:); ## Draw a line through the median median_x = ones (2,1)*groups + [-a;+a]*box; median_y = s([3,3],:); ## Chop all boxes which don't have enough stats quartile_x(:, chop) = []; quartile_y(:, chop) = []; whisker_x(:,[chop, chop+nc]) = []; whisker_y(:,[chop, chop+nc]) = []; median_x(:, chop) = []; median_y(:, chop) = []; ## Add caps to the remaining whiskers cap_x = whisker_x; cap_x(1, :) -= 0.05; cap_x(2, :) += 0.05; cap_y = whisker_y([1, 1], :); #quartile_x,quartile_y #whisker_x,whisker_y #median_x,median_y #cap_x,cap_y ## Do the plot if (vertical) if (isempty (plot_opts)) h = plot (quartile_x, quartile_y, "b;;", whisker_x, whisker_y, "b;;", cap_x, cap_y, "b;;", median_x, median_y, "r;;", outliers_x, outliers_y, [symbol(1), "r;;"], outliers2_x, outliers2_y, [symbol(2), "r;;"]); else h = plot (quartile_x, quartile_y, "b;;", whisker_x, whisker_y, "b;;", cap_x, cap_y, "b;;", median_x, median_y, "r;;", outliers_x, outliers_y, [symbol(1), "r;;"], outliers2_x, outliers2_y, [symbol(2), "r;;"], plot_opts{:}); endif else if (isempty (plot_opts)) h = plot (quartile_y, quartile_x, "b;;", whisker_y, whisker_x, "b;;", cap_y, cap_x, "b;;", median_y, median_x, "r;;", outliers_y, outliers_x, [symbol(1), "r;;"], outliers2_y, outliers2_x, [symbol(2), "r;;"]); else h = plot (quartile_y, quartile_x, "b;;", whisker_y, whisker_x, "b;;", cap_y, cap_x, "b;;", median_y, median_x, "r;;", outliers_y, outliers_x, [symbol(1), "r;;"], outliers2_y, outliers2_x, [symbol(2), "r;;"], plot_opts{:}); endif endif % Distribute handles nq = 1:size(quartile_x,2); hs.box = h(nq); nw = nq(end) + [1:2*size(whisker_x,2)]; hs.whisker = h(nw); nm = nw(end)+ [1:size(median_x,2)]; hs.median = h(nm); no = nm; if ~isempty (outliers_y) no = nm(end) + [1:size(outliers_y,2)]; hs.outliers = h(no); end if ~isempty (outliers2_y) no2 = no(end) + [1:size(outliers2_y,2)]; hs.outliers2 = h(no2); end endfunction %!demo %! axis ([0,3]); %! boxplot ({randn(10,1)*5+140, randn(13,1)*8+135}); %! set(gca (), "xtick", [1 2], "xticklabel", {"girls", "boys"}) %! title ("Grade 3 heights"); statistics-1.3.0/inst/burrcdf.m0000644000000000000000000000661112776476211014636 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} burrcdf (@var{x}, @var{c}, @var{k}) ## For each element of @var{x}, compute the cumulative distribution function ## (CDF) at @var{x} of the Burr distribution with scale parameter @var{alpha} ## and shape parameters @var{c} and @var{k}. ## @end deftypefn ## Author: Dag Lyberg ## Description: CDF of the Burr distribution function cdf = burrcdf (x, alpha, c, k) if (nargin != 4) print_usage (); endif if (! isscalar (alpha) || ! isscalar (c) || ! isscalar (k) ) [retval, x, alpha, c, k] = common_size (x, alpha, c, k); if (retval > 0) error ("burrcdf: X, ALPHA, C AND K must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex(alpha) || iscomplex (c) || iscomplex (k)) error ("burrcdf: X, ALPHA, C AND K must not be complex"); endif if (isa (x, "single") || isa (alpha, "single") || isa (c, "single") ... || isa (k, "single")) cdf = zeros (size (x), "single"); else cdf = zeros (size (x)); endif j = isnan (x) | ! (alpha > 0) | ! (c > 0) | ! (k > 0); cdf(j) = NaN; j = (x > 0) & (0 < alpha) & (alpha < Inf) & (0 < c) & (c < Inf) ... & (0 < k) & (k < Inf); if (isscalar (alpha) && isscalar(c) && isscalar(k)) cdf(j) = 1 - (1 + (x(j) / alpha).^c).^(-k); else cdf(j) = 1 - (1 + (x(j) ./ alpha(j)).^c(j)).^(-k(j)); endif endfunction %!shared x,y %! x = [-1, 0, 1, 2, Inf]; %! y = [0, 0, 1/2, 2/3, 1]; %!assert (burrcdf (x, ones(1,5), ones (1,5), ones (1,5)), y, eps) %!assert (burrcdf (x, 1, 1, 1), y, eps) %!assert (burrcdf (x, [1, 1, NaN, 1, 1], 1, 1), [y(1:2), NaN, y(4:5)], eps) %!assert (burrcdf (x, 1, [1, 1, NaN, 1, 1], 1), [y(1:2), NaN, y(4:5)], eps) %!assert (burrcdf (x, 1, 1, [1, 1, NaN, 1, 1]), [y(1:2), NaN, y(4:5)], eps) %!assert (burrcdf ([x, NaN], 1, 1, 1), [y, NaN], eps) ## Test class of input preserved %!assert (burrcdf (single ([x, NaN]), 1, 1, 1), single ([y, NaN]), eps('single')) %!assert (burrcdf ([x, NaN], single (1), 1, 1), single ([y, NaN]), eps('single')) %!assert (burrcdf ([x, NaN], 1, single (1), 1), single ([y, NaN]), eps('single')) %!assert (burrcdf ([x, NaN], 1, 1, single (1)), single ([y, NaN]), eps('single')) ## Test input validation %!error burrcdf () %!error burrcdf (1) %!error burrcdf (1,2) %!error burrcdf (1,2,3) %!error burrcdf (1,2,3,4,5) %!error burrcdf (ones (3), ones (2), ones(2), ones(2)) %!error burrcdf (ones (2), ones (3), ones(2), ones(2)) %!error burrcdf (ones (2), ones (2), ones(3), ones(2)) %!error burrcdf (ones (2), ones (2), ones(2), ones(3)) %!error burrcdf (i, 2, 2, 2) %!error burrcdf (2, i, 2, 2) %!error burrcdf (2, 2, i, 2) %!error burrcdf (2, 2, 2, i) statistics-1.3.0/inst/burrinv.m0000644000000000000000000000716612776476211014704 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} burrinv (@var{x}, @var{alpha}, @var{c}, @var{k}) ## For each element of @var{x}, compute the quantile (the inverse of the CDF) ## at @var{x} of the Burr distribution with scale parameter @var{alpha} and ## shape parameters @var{c} and @var{k}. ## @end deftypefn ## Author: Dag Lyberg ## Description: Quantile function of the Burr distribution function inv = burrinv (x, alpha, c, k) if (nargin != 4) print_usage (); endif if (! isscalar (alpha) || ! isscalar (c) || ! isscalar (k) ) [retval, x, alpha, c, k] = common_size (x, alpha, c, k); if (retval > 0) error ("burrinv: X, ALPHA, C AND K must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex(alpha) || iscomplex (c) || iscomplex (k)) error ("burrinv: X, ALPHA, C AND K must not be complex"); endif if (isa (x, "single") || isa (alpha, "single") || isa (c, "single") ... || isa (k, "single")) inv = zeros (size (x), "single"); else inv = zeros (size (x)); endif j = isnan (x) | (x < 0) | (x > 1) | ! (alpha > 0) | ! (c > 0) | ! (k > 0); inv(j) = NaN; j = (x == 1) & (0 < alpha) & (alpha < Inf) & (0 < c) & (c < Inf) ... & (0 < k) & (k < Inf); inv(j) = Inf; j = (0 < x) & (x < 1) & (0 < alpha) & (alpha < Inf) & (0 < c) & (c < Inf) ... & (0 < k) & (k < Inf); if (isscalar (alpha) && isscalar(c) && isscalar(k)) inv(j) = ((1 - x(j) / alpha).^(-1 / k) - 1).^(1 / c) ; else inv(j) = ((1 - x(j) ./ alpha(j)).^(-1 ./ k(j)) - 1).^(1 ./ c(j)) ; endif endfunction %!shared x,y %! x = [-Inf, -1, 0, 1/2, 1, 2, Inf]; %! y = [NaN, NaN, 0, 1 , Inf, NaN, NaN]; %!assert (burrinv (x, ones (1,7), ones (1,7), ones(1,7)), y, eps) %!assert (burrinv (x, 1, 1, 1), y, eps) %!assert (burrinv (x, [1, 1, 1, NaN, 1, 1, 1], 1, 1), [y(1:3), NaN, y(5:7)], eps) %!assert (burrinv (x, 1, [1, 1, 1, NaN, 1, 1, 1], 1), [y(1:3), NaN, y(5:7)], eps) %!assert (burrinv (x, 1, 1, [1, 1, 1, NaN, 1, 1, 1]), [y(1:3), NaN, y(5:7)], eps) %!assert (burrinv ([x, NaN], 1, 1, 1), [y, NaN], eps) ## Test class of input preserved %!assert (burrinv (single ([x, NaN]), 1, 1, 1), single ([y, NaN]), eps('single')) %!assert (burrinv ([x, NaN], single (1), 1, 1), single ([y, NaN]), eps('single')) %!assert (burrinv ([x, NaN], 1, single (1), 1), single ([y, NaN]), eps('single')) %!assert (burrinv ([x, NaN], 1, 1, single (1)), single ([y, NaN]), eps('single')) ## Test input validation %!error burrinv () %!error burrinv (1) %!error burrinv (1,2) %!error burrinv (1,2,3) %!error burrinv (1,2,3,4,5) %!error burrinv (ones (3), ones (2), ones(2), ones(2)) %!error burrinv (ones (2), ones (3), ones(2), ones(2)) %!error burrinv (ones (2), ones (2), ones(3), ones(2)) %!error burrinv (ones (2), ones (2), ones(2), ones(3)) %!error burrinv (i, 2, 2, 2) %!error burrinv (2, i, 2, 2) %!error burrinv (2, 2, i, 2) %!error burrinv (2, 2, 2, i) statistics-1.3.0/inst/burrpdf.m0000644000000000000000000000666012776476211014657 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} burrpdf (@var{x}, @var{alpha}, @var{c}, @var{k}) ## For each element of @var{x}, compute the probability density function (PDF) ## at @var{x} of the Burr distribution with scale parameter @var{alpha} and ## shape parameters @var{c} and @var{k}. ## @end deftypefn ## Author: Dag Lyberg ## Description: PDF of the Burr distribution function pdf = burrpdf (x, alpha, c, k) if (nargin != 4) print_usage (); endif if (! isscalar (alpha) || ! isscalar (c) || ! isscalar (k) ) [retval, x, alpha, c, k] = common_size (x, alpha, c, k); if (retval > 0) error ("burrpdf: X, ALPHA, C AND K must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex(alpha) || iscomplex (c) || iscomplex (k)) error ("burrpdf: X, ALPHA, C AND K must not be complex"); endif if (isa (x, "single") || isa (alpha, "single") ... || isa (c, "single") || isa (k, "single")) pdf = zeros (size (x), "single"); else pdf = zeros (size (x)); endif j = isnan (x) | ! (alpha > 0) | ! (c > 0) | ! (k > 0); pdf(j) = NaN; j = (x > 0) & (0 < alpha) & (alpha < Inf) & (0 < c) & (c < Inf) ... & (0 < k) & (k < Inf); if (isscalar (alpha) && isscalar (c) && isscalar(k)) pdf(j) = (c * k / alpha) .* (x(j) / alpha).^(c-1) ./ ... (1 + (x(j) / alpha).^c).^(k + 1); else pdf(j) = (c(j) .* k(j) ./ alpha(j) ).* x(j).^(c(j)-1) ./ ... (1 + (x(j) ./ alpha(j) ).^c(j) ).^(k(j) + 1); endif endfunction %!shared x,y %! x = [-1, 0, 1, 2, Inf]; %! y = [0, 0, 1/4, 1/9, 0]; %!assert (burrpdf (x, ones(1,5), ones (1,5), ones (1,5)), y) %!assert (burrpdf (x, 1, 1, 1), y) %!assert (burrpdf (x, [1, 1, NaN, 1, 1], 1, 1), [y(1:2), NaN, y(4:5)]) %!assert (burrpdf (x, 1, [1, 1, NaN, 1, 1], 1), [y(1:2), NaN, y(4:5)]) %!assert (burrpdf (x, 1, 1, [1, 1, NaN, 1, 1]), [y(1:2), NaN, y(4:5)]) %!assert (burrpdf ([x, NaN], 1, 1, 1), [y, NaN]) ## Test class of input preserved %!assert (burrpdf (single ([x, NaN]), 1, 1, 1), single ([y, NaN])) %!assert (burrpdf ([x, NaN], single (1), 1, 1), single ([y, NaN])) %!assert (burrpdf ([x, NaN], 1, single (1), 1), single ([y, NaN])) %!assert (burrpdf ([x, NaN], 1, 1, single (1)), single ([y, NaN])) ## Test input validation %!error burrpdf () %!error burrpdf (1) %!error burrpdf (1,2) %!error burrpdf (1,2,3) %!error burrpdf (1,2,3,4,5) %!error burrpdf (ones (3), ones (2), ones(2), ones(2)) %!error burrpdf (ones (2), ones (3), ones(2), ones(2)) %!error burrpdf (ones (2), ones (2), ones(3), ones(2)) %!error burrpdf (ones (2), ones (2), ones(2), ones(3)) %!error burrpdf (i, 2, 2, 2) %!error burrpdf (2, i, 2, 2) %!error burrpdf (2, 2, i, 2) %!error burrpdf (2, 2, 2, i) statistics-1.3.0/inst/burrrnd.m0000644000000000000000000001203012776476211014655 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} burrrnd (@var{alpha}, @var{c}, @var{k}) ## @deftypefnx {} {} burrrnd (@var{alpha}, @var{c}, @var{k}, @var{r}) ## @deftypefnx {} {} burrrnd (@var{alpha}, @var{c}, @var{k}, @var{r}, @var{c}, @dots{}) ## @deftypefnx {} {} burrrnd (@var{alpha}, @var{c}, @var{k}, [@var{sz}]) ## Return a matrix of random samples from the generalized Pareto distribution ## with scale parameter @var{alpha} and shape parameters @var{c} and @var{k}. ## ## When called with a single size argument, return a square matrix with ## the dimension specified. When called with more than one scalar argument the ## first two arguments are taken as the number of rows and columns and any ## further arguments specify additional matrix dimensions. The size may also ## be specified with a vector of dimensions @var{sz}. ## ## If no size arguments are given then the result matrix is the common size of ## @var{alpha}, @var{c} and @var{k}. ## @end deftypefn ## Author: Dag Lyberg ## Description: Random deviates from the generalized extreme value (GEV) distribution function rnd = burrrnd (alpha, c, k, varargin) if (nargin < 3) print_usage (); endif if (! isscalar (alpha) || ! isscalar (c) || ! isscalar (k)) [retval, alpha, c, k] = common_size (alpha, c, k); if (retval > 0) error ("burrrnd: ALPHA, C and K must be of common size or scalars"); endif endif if (iscomplex (alpha) || iscomplex (c) || iscomplex (k)) error ("burrrnd: ALPHA, C and K must not be complex"); endif if (nargin == 3) sz = size (alpha); elseif (nargin == 4) if (isscalar (varargin{1}) && varargin{1} >= 0) sz = [varargin{1}, varargin{1}]; elseif (isrow (varargin{1}) && all (varargin{1} >= 0)) sz = varargin{1}; else error ("burrrnd: dimension vector must be row vector of non-negative integers"); endif elseif (nargin > 4) if (any (cellfun (@(x) (! isscalar (x) || x < 0), varargin))) error ("burrrnd: dimensions must be non-negative integers"); endif sz = [varargin{:}]; endif if (! isscalar (alpha) && ! isequal (size (c), sz) && ! isequal (size (k), sz)) error ("burrrnd: ALPHA, C and K must be scalar or of size SZ"); endif if (isa (alpha, "single") || isa (c, "single") || isa (k, "single")) cls = "single"; else cls = "double"; endif if (isscalar (alpha) && isscalar (c) && isscalar(k)) if ((0 < alpha) && (alpha < Inf) && (0 < c) && (c < Inf) ... && (0 < k) && (k < Inf)) rnd = rand (sz, cls); rnd(:) = ((1 - rnd(:) / alpha).^(-1 / k) - 1).^(1 / c); else rnd = NaN (sz, cls); endif else rnd = NaN (sz, cls); j = (0 < alpha) && (alpha < Inf) && (0 < c) && (c < Inf) ... && (0 < k) && (k < Inf); rnd(k) = rand(sum(j(:)),1); rnd(k) = ((1 - rnd(j) / alpha(j)).^(-1 ./ k(j)) - 1).^(1 ./ c(j)); endif endfunction %!assert (size (burrrnd (1, 1, 1)), [1 1]) %!assert (size (burrrnd (ones (2,1), 1, 1)), [2, 1]) %!assert (size (burrrnd (ones (2,2), 1, 1)), [2, 2]) %!assert (size (burrrnd (1, ones (2,1), 1)), [2, 1]) %!assert (size (burrrnd (1, ones (2,2), 1)), [2, 2]) %!assert (size (burrrnd (1, 1, ones (2,1))), [2, 1]) %!assert (size (burrrnd (1, 1, ones (2,2))), [2, 2]) %!assert (size (burrrnd (1, 1, 1, 3)), [3, 3]) %!assert (size (burrrnd (1, 1, 1, [4 1])), [4, 1]) %!assert (size (burrrnd (1, 1, 1, 4, 1)), [4, 1]) ## Test class of input preserved %!assert (class (burrrnd (1,1,1)), "double") %!assert (class (burrrnd (single (1),1,1)), "single") %!assert (class (burrrnd (single ([1 1]),1,1)), "single") %!assert (class (burrrnd (1,single (1),1)), "single") %!assert (class (burrrnd (1,single ([1 1]),1)), "single") %!assert (class (burrrnd (1,1,single (1))), "single") %!assert (class (burrrnd (1,1,single ([1 1]))), "single") ## Test input validation %!error burrrnd () %!error burrrnd (1) %!error burrrnd (1,2) %!error burrrnd (ones (3), ones (2), ones (2), 2) %!error burrrnd (ones (2), ones (3), ones (2), 2) %!error burrrnd (ones (2), ones (2), ones (3), 2) %!error burrrnd (i, 2, 2) %!error burrrnd (2, i, 2) %!error burrrnd (2, 2, i) %!error burrrnd (4,2,2, -1) %!error burrrnd (4,2,2, ones (2)) %!error burrrnd (4,2,2, [2 -1 2]) %!error burrrnd (4*ones (2),2,2, 3) %!error burrrnd (4*ones (2),2,2, [3, 2]) %!error burrrnd (4*ones (2),2,2, 3, 2) statistics-1.3.0/inst/caseread.m0000755000000000000000000000341412776476211014757 0ustar 00000000000000## Copyright (C) 2008 Bill Denney ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{names} =} caseread (@var{filename}) ## Read case names from an ascii file. ## ## Essentially, this reads all lines from a file as text and returns ## them in a string matrix. ## @seealso{casewrite, tblread, tblwrite, csv2cell, cell2csv, fopen} ## @end deftypefn ## Author: Bill Denney ## Description: Read strings from a file function names = caseread (f="") ## Check arguments if nargin != 1 print_usage (); endif if isempty (f) ## FIXME: open a file dialog box in this case when a file dialog box ## becomes available error ("caseread: filename must be given") endif [fid msg] = fopen (f, "rt"); if fid < 0 || (! isempty (msg)) error ("caseread: cannot open %s: %s", f, msg); endif names = {}; t = fgetl (fid); while ischar (t) names{end+1} = t; t = fgetl (fid); endwhile if (fclose (fid) < 0) error ("caseread: error closing f") endif names = strvcat (names); endfunction ## Tests %!shared n %! n = ["a ";"bcd";"ef "]; %!assert (caseread ("caseread.dat"), n); statistics-1.3.0/inst/casewrite.m0000755000000000000000000000420112776476211015171 0ustar 00000000000000## Copyright (C) 2008 Bill Denney ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {} casewrite (@var{strmat}, @var{filename}) ## Write case names to an ascii file. ## ## Essentially, this writes all lines from @var{strmat} to ## @var{filename} (after deblanking them). ## @seealso{caseread, tblread, tblwrite, csv2cell, cell2csv, fopen} ## @end deftypefn ## Author: Bill Denney ## Description: Write strings from a file function names = casewrite (s="", f="") ## Check arguments if nargin != 2 print_usage (); endif if isempty (f) ## FIXME: open a file dialog box in this case when a file dialog box ## becomes available error ("casewrite: filename must be given") endif if isempty (s) error ("casewrite: strmat must be given") elseif ! ischar (s) error ("casewrite: strmat must be a character matrix") elseif ndims (s) != 2 error ("casewrite: strmat must be two dimensional") endif [fid msg] = fopen (f, "wt"); if fid < 0 || (! isempty (msg)) error ("casewrite: cannot open %s for writing: %s", f, msg); endif for i = 1:rows (s) status = fputs (fid, sprintf ("%s\n", deblank (s(i,:)))); endfor if (fclose (fid) < 0) error ("casewrite: error closing f") endif endfunction %!test %! fname = [tempname() ".dat"]; %! unwind_protect %! s = ["a ";"bcd";"ef "]; %! casewrite (s, fname) %! names = caseread (fname); %! unwind_protect_cleanup %! unlink (fname); %! end_unwind_protect %! assert(names, s); statistics-1.3.0/inst/cdf.m0000644000000000000000000001012412776476211013735 0ustar 00000000000000## Copyright (C) 2013 Pantxo Diribarne ## ## This program is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{retval} =} cdf (@var{name}, @var{X}, @dots{}) ## Return cumulative density function of @var{name} function for value ## @var{x}. ## This is a wrapper around various @var{name}cdf and @var{name}_cdf ## functions. See the individual functions help to learn the signification of ## the arguments after @var{x}. Supported functions and corresponding number of ## additional arguments are: ## ## @multitable @columnfractions 0.02 0.3 0.45 0.2 ## @headitem @tab function @tab alternative @tab args ## @item @tab "beta" @tab "beta" @tab 2 ## @item @tab "bino" @tab "binomial" @tab 2 ## @item @tab "cauchy" @tab @tab 2 ## @item @tab "chi2" @tab "chisquare" @tab 1 ## @item @tab "discrete" @tab @tab 2 ## @item @tab "exp" @tab "exponential" @tab 1 ## @item @tab "f" @tab @tab 2 ## @item @tab "gam" @tab "gamma" @tab 2 ## @item @tab "geo" @tab "geometric" @tab 1 ## @item @tab "gev" @tab "generalized extreme value" @tab 3 ## @item @tab "hyge" @tab "hypergeometric" @tab 3 ## @item @tab "kolmogorov_smirnov" @tab @tab 1 ## @item @tab "laplace" @tab @tab 0 ## @item @tab "logistic" @tab @tab 0 ## @item @tab "logn" @tab "lognormal" @tab 2 ## @item @tab "norm" @tab "normal" @tab 2 ## @item @tab "poiss" @tab "poisson" @tab 1 ## @item @tab "rayl" @tab "rayleigh" @tab 1 ## @item @tab "t" @tab @tab 1 ## @item @tab "unif" @tab "uniform" @tab 2 ## @item @tab "wbl" @tab "weibull" @tab 2 ## @end multitable ## ## @seealso{betacdf, binocdf, cauchy_cdf, chi2cdf, discrete_cdf, ## expcdf, fcdf, gamcdf, geocdf, gevcdf, hygecdf, ## kolmogorov_smirnov_cdf, laplace_cdf, logistic_cdf, logncdf, ## normcdf, poisscdf, raylcdf, tcdf, unifcdf, wblcdf} ## @end deftypefn function [retval] = cdf (varargin) ## implemented functions persistent allcdf = {{"beta", "beta"}, @betacdf, 2, ... {"bino", "binomial"}, @binocdf, 2, ... {"cauchy"}, @cauchy_cdf, 2, ... {"chi2", "chisquare"}, @chi2cdf, 1, ... {"discrete"}, @discrete_cdf, 2, ... {"exp", "exponential"}, @expcdf, 1, ... {"f"}, @fcdf, 2, ... {"gam", "gamma"}, @gamcdf, 2, ... {"geo", "geometric"}, @geocdf, 1, ... {"gev", "generalized extreme value"}, @gevcdf, 3, ... {"hyge", "hypergeometric"}, @hygecdf, 3, ... {"kolmogorov_smirnov"}, @kolmogorov_smirnov_cdf, 1, ... {"laplace"}, @laplace_cdf, 0, ... {"logistic"}, @logistic_cdf, 0, ... # ML has 2 args here {"logn", "lognormal"}, @logncdf, 2, ... {"norm", "normal"}, @normcdf, 2, ... {"poiss", "poisson"}, @poisscdf, 1, ... {"rayl", "rayleigh"}, @raylcdf, 1, ... {"t"}, @tcdf, 1, ... {"unif", "uniform"}, @unifcdf, 2, ... {"wbl", "weibull"}, @wblcdf, 2}; if (numel (varargin) < 2 || ! ischar (varargin{1})) print_usage (); endif name = varargin{1}; x = varargin{2}; varargin(1:2) = []; nargs = numel (varargin); cdfnames = allcdf(1:3:end); cdfhdl = allcdf(2:3:end); cdfargs = allcdf(3:3:end); idx = cellfun (@(x) any (strcmpi (name, x)), cdfnames); if (any (idx)) if (nargs == cdfargs{idx}) retval = feval (cdfhdl{idx}, x, varargin{:}); else error ("cdf: %s requires %d arguments", name, cdfargs{idx}) endif else error ("cdf: %s not implemented", name); endif endfunction %!test %! assert(cdf ('norm', 1, 0, 1), normcdf (1, 0, 1))statistics-1.3.0/inst/chi2stat.m0000755000000000000000000000445012776476211014732 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} chi2stat (@var{n}) ## Compute mean and variance of the chi-square distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{n} is the parameter of the chi-square distribution. The elements ## of @var{n} must be positive ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the chi-square distribution ## ## @item ## @var{v} is the variance of the chi-square distribution ## @end itemize ## ## @subheading Example ## ## @example ## @group ## n = 1:6; ## [m, v] = chi2stat (n) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the chi-square distribution function [m, v] = chi2stat (n) # Check arguments if (nargin != 1) print_usage (); endif if (! isempty (n) && ! ismatrix (n)) error ("chi2stat: n must be a numeric matrix"); endif # Calculate moments m = n; v = 2 .* n; # Continue argument check k = find (! (n > 0) | ! (n < Inf)); if (any (k)) m(k) = NaN; v(k) = NaN; endif endfunction %!test %! n = 1:6; %! [m, v] = chi2stat (n); %! assert (m, n); %! assert (v, [2, 4, 6, 8, 10, 12], 0.001); statistics-1.3.0/inst/cl_multinom.m0000755000000000000000000001201112776476211015523 0ustar 00000000000000## Copyright (C) 2009 Levente Torok ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## ## @deftypefn {Function File} {@var{CL} =} cl_multinom (@var{x}, @var{N}, @var{b}, @var{calculation_type} ) - Confidence level of multinomial portions ## Returns confidence level of multinomial parameters estimated @math{ p = x / sum(x) } with predefined confidence interval @var{b}. ## Finite population is also considered. ## ## This function calculates the level of confidence at which the samples represent the true distribution ## given that there is a predefined tolerance (confidence interval). ## This is the upside down case of the typical excercises at which we want to get the confidence interval ## given the confidence level (and the estimated parameters of the underlying distribution). ## But once we accept (lets say at elections) that we have a standard predefined ## maximal acceptable error rate (e.g. @var{b}=0.02 ) in the estimation and we just want to know that how sure we ## can be that the measured proportions are the same as in the ## entire population (ie. the expected value and mean of the samples are roghly the same) we need to use this function. ## ## @subheading Arguments ## @itemize @bullet ## @item @var{x} : int vector : sample frequencies bins ## @item @var{N} : int : Population size that was sampled by x. If N 4) print_usage; elseif (!ischar (calculation_type)) error ("Argument calculation_type must be a string"); endif k = rows(x); nn = sum(x); p = x / nn; if (isscalar( b )) if (b==0) b=0.02; endif b = ones( rows(x), 1 ) * b; if (b<0) b=1 ./ max( x, 1 ); endif endif bb = b .* b; if (N==nn) CL = 1; return; endif if (N ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} @var{Y} = cmdscale (@var{D}) ## @deftypefnx{Function File} [@var{Y}, @var{e} ] = cmdscale (@var{D}) ## Classical multidimensional scaling of a matrix. ## ## Takes an @var{n} by @var{n} distance (or difference, similarity, or ## dissimilarity) matrix @var{D}. Returns @var{Y}, a matrix of @var{n} points ## with coordinates in @var{p} dimensional space which approximate those ## distances (or differences, similarities, or dissimilarities). Also returns ## the eigenvalues @var{e} of ## @code{@var{B} = -1/2 * @var{J} * (@var{D}.^2) * @var{J}}, where ## @code{J = eye(@var{n}) - ones(@var{n},@var{n})/@var{n}}. @var{p}, the number ## of columns of @var{Y}, is equal to the number of positive real eigenvalues of ## @var{B}. ## ## @var{D} can be a full or sparse matrix or a vector of length ## @code{@var{n}*(@var{n}-1)/2} containing the upper triangular elements (like ## the output of the @code{pdist} function). It must be symmetric with ## non-negative entries whose values are further restricted by the type of ## matrix being represented: ## ## * If @var{D} is either a distance, dissimilarity, or difference matrix, then ## it must have zero entries along the main diagonal. In this case the points ## @var{Y} equal or approximate the distances given by @var{D}. ## ## * If @var{D} is a similarity matrix, the elements must all be less than or ## equal to one, with ones along the the main diagonal. In this case the points ## @var{Y} equal or approximate the distances given by ## @code{@var{D} = sqrt(ones(@var{n},@var{n})-@var{D})}. ## ## @var{D} is a Euclidean matrix if and only if @var{B} is positive ## semi-definite. When this is the case, then @var{Y} is an exact representation ## of the distances given in @var{D}. If @var{D} is non-Euclidean, @var{Y} only ## approximates the distance given in @var{D}. The approximation used by ## @code{cmdscale} minimizes the statistical loss function known as ## @var{strain}. ## ## The returned @var{Y} is an @var{n} by @var{p} matrix showing possible ## coordinates of the points in @var{p} dimensional space ## (@code{@var{p} < @var{n}}). The columns are correspond to the positive ## eigenvalues of @var{B} in descending order. A translation, rotation, or ## reflection of the coordinates given by @var{Y} will satisfy the same distance ## matrix up to the limits of machine precision. ## ## For any @code{@var{k} <= @var{p}}, if the largest @var{k} positive ## eigenvalues of @var{B} are significantly greater in absolute magnitude than ## its other eigenvalues, the first @var{k} columns of @var{Y} provide a ## @var{k}-dimensional reduction of @var{Y} which approximates the distances ## given by @var{D}. The optional return @var{e} can be used to consider various ## values of @var{k}, or to evaluate the accuracy of specific dimension ## reductions (e.g., @code{@var{k} = 2}). ## ## Reference: Ingwer Borg and Patrick J.F. Groenen (2005), Modern ## Multidimensional Scaling, Second Edition, Springer, ISBN: 978-0-387-25150-9 ## (Print) 978-0-387-28981-6 (Online) ## ## @seealso{pdist} ## @end deftypefn ## Author: JD Walsh ## Created: 2014-10-31 ## Description: Classical multidimensional scaling ## Keywords: multidimensional-scaling mds distance clustering ## TO DO: include missing functions `mdscale' and `procrustes' in @seealso function [Y, e] = cmdscale (D) % Check for matrix input if ((nargin ~= 1) || ... (~any(strcmp ({'matrix' 'scalar' 'range'}, typeinfo(D))))) usage ('cmdscale: input must be vector or matrix; see help'); endif % If vector, convert to matrix; otherwise, check for square symmetric input if (isvector (D)) D = squareform (D); elseif ((~issquare (D)) || (norm (D - D', 1) > 0)) usage ('cmdscale: matrix input must be square symmetric; see help'); endif n = size (D,1); % Check for valid format (see help above); If similarity matrix, convert if (any (any (D < 0))) usage ('cmdscale: entries must be nonnegative; see help'); elseif (trace (D) ~= 0) if ((~all (diag (D) == 1)) || (~all (D <= 1))) usage ('cmdscale: input must be distance vector or matrix; see help'); endif D = sqrt (ones (n,n) - D); endif % Build centering matrix, perform double centering, extract eigenpairs J = eye (n) - ones (n,n) / n; B = -1 / 2 * J * (D .^ 2) * J; [Q, e] = eig (B); e = diag (e); etmp = e; e = sort(e, 'descend'); % Remove complex eigenpairs (only possible due to machine approximation) if (iscomplex (etmp)) for i = 1 : size (etmp,1) cmp(i) = (isreal (etmp(i))); endfor etmp = etmp(cmp); Q = Q(:,cmp); endif % Order eigenpairs [etmp, ord] = sort (etmp, 'descend'); Q = Q(:,ord); % Remove negative eigenpairs cmp = (etmp > 0); etmp = etmp(cmp); Q = Q(:,cmp); % Test for n-dimensional results if (size(etmp,1) == n) etmp = etmp(1:n-1); Q = Q(:, 1:n-1); endif % Build output matrix Y Y = Q * diag (sqrt (etmp)); endfunction %!shared m, n, X, D %! m = randi(100) + 1; n = randi(100) + 1; X = rand(m, n); D = pdist(X); %!assert(norm(pdist(cmdscale(D))), norm(D), sqrt(eps)) %!assert(norm(pdist(cmdscale(squareform(D)))), norm(D), sqrt(eps)) statistics-1.3.0/inst/combnk.m0000755000000000000000000000466712776476211014474 0ustar 00000000000000## Copyright (C) 2010 Soren Hauberg ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{c} =} combnk (@var{data}, @var{k}) ## Return all combinations of @var{k} elements in @var{data}. ## @end deftypefn function retval = combnk (data, k) ## Check input if (nargin != 2) print_usage; elseif (! isvector (data)) error ("combnk: first input argument must be a vector"); elseif (!isreal (k) || k != round (k) || k < 0) error ("combnk: second input argument must be a non-negative integer"); endif ## Simple checks n = numel (data); if (k == 0 || k > n) retval = resize (data, 0, k); elseif (k == n) retval = data (:).'; else retval = __combnk__ (data, k); endif ## For some odd reason Matlab seems to treat strings differently compared to other data-types... if (ischar (data)) retval = flipud (retval); endif endfunction function retval = __combnk__ (data, k) ## Recursion stopping criteria if (k == 1) retval = data (:); else ## Process data n = numel (data); if iscell (data) retval = {}; else retval = []; endif for j = 1:n C = __combnk__ (data ((j+1):end), k-1); C = cat (2, repmat (data (j), rows (C), 1), C); if (!isempty (C)) if (isempty (retval)) retval = C; else retval = [retval; C]; endif endif endfor endif endfunction %!demo %! c = combnk (1:5, 2); %! disp ("All pairs of integers between 1 and 5:"); %! disp (c); %!test %! c = combnk (1:3, 2); %! assert (c, [1, 2; 1, 3; 2, 3]); %!test %! c = combnk (1:3, 6); %! assert (isempty (c)); %!test %! c = combnk ({1, 2, 3}, 2); %! assert (c, {1, 2; 1, 3; 2, 3}); %!test %! c = combnk ("hello", 2); %! assert (c, ["lo"; "lo"; "ll"; "eo"; "el"; "el"; "ho"; "hl"; "hl"; "he"]); statistics-1.3.0/inst/copulacdf.m0000755000000000000000000002174012776476211015152 0ustar 00000000000000## Copyright (C) 2008 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{p} =} copulacdf (@var{family}, @var{x}, @var{theta}) ## @deftypefnx {Function File} {} copulacdf ('t', @var{x}, @var{theta}, @var{nu}) ## Compute the cumulative distribution function of a copula family. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{family} is the copula family name. Currently, @var{family} can ## be @code{'Gaussian'} for the Gaussian family, @code{'t'} for the ## Student's t family, @code{'Clayton'} for the Clayton family, ## @code{'Gumbel'} for the Gumbel-Hougaard family, @code{'Frank'} for ## the Frank family, @code{'AMH'} for the Ali-Mikhail-Haq family, or ## @code{'FGM'} for the Farlie-Gumbel-Morgenstern family. ## ## @item ## @var{x} is the support where each row corresponds to an observation. ## ## @item ## @var{theta} is the parameter of the copula. For the Gaussian and ## Student's t copula, @var{theta} must be a correlation matrix. For ## bivariate copulas @var{theta} can also be a correlation coefficient. ## For the Clayton family, the Gumbel-Hougaard family, the Frank family, ## and the Ali-Mikhail-Haq family, @var{theta} must be a vector with the ## same number of elements as observations in @var{x} or be scalar. For ## the Farlie-Gumbel-Morgenstern family, @var{theta} must be a matrix of ## coefficients for the Farlie-Gumbel-Morgenstern polynomial where each ## row corresponds to one set of coefficients for an observation in ## @var{x}. A single row is expanded. The coefficients are in binary ## order. ## ## @item ## @var{nu} is the degrees of freedom for the Student's t family. ## @var{nu} must be a vector with the same number of elements as ## observations in @var{x} or be scalar. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{p} is the cumulative distribution of the copula at each row of ## @var{x} and corresponding parameter @var{theta}. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = [0.2:0.2:0.6; 0.2:0.2:0.6]; ## theta = [1; 2]; ## p = copulacdf ("Clayton", x, theta) ## @end group ## ## @group ## x = [0.2:0.2:0.6; 0.2:0.1:0.4]; ## theta = [0.2, 0.1, 0.1, 0.05]; ## p = copulacdf ("FGM", x, theta) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Roger B. Nelsen. @cite{An Introduction to Copulas}. Springer, ## New York, second edition, 2006. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: CDF of a copula family function p = copulacdf (family, x, theta, nu) # Check arguments if (nargin != 3 && (nargin != 4 || ! strcmpi (family, "t"))) print_usage (); endif if (! ischar (family)) error ("copulacdf: family must be one of 'Gaussian', 't', 'Clayton', 'Gumbel', 'Frank', 'AMH', and 'FGM'"); endif if (! isempty (x) && ! ismatrix (x)) error ("copulacdf: x must be a numeric matrix"); endif [n, d] = size (x); lower_family = lower (family); # Check family and copula parameters switch (lower_family) case {"gaussian", "t"} # Family with a covariance matrix if (d == 2 && isscalar (theta)) # Expand a scalar to a correlation matrix theta = [1, theta; theta, 1]; endif if (any (size (theta) != [d, d]) || any (diag (theta) != 1) || any (any (theta != theta')) || min (eig (theta)) <= 0) error ("copulacdf: theta must be a correlation matrix"); endif if (nargin == 4) # Student's t family if (! isscalar (nu) && (! isvector (nu) || length (nu) != n)) error ("copulacdf: nu must be a vector with the same number of rows as x or be scalar"); endif nu = nu(:); endif case {"clayton", "gumbel", "frank", "amh"} # Archimedian one parameter family if (! isvector (theta) || (! isscalar (theta) && length (theta) != n)) error ("copulacdf: theta must be a vector with the same number of rows as x or be scalar"); endif theta = theta(:); if (n > 1 && isscalar (theta)) theta = repmat (theta, n, 1); endif case {"fgm"} # Exponential number of parameters if (! ismatrix (theta) || size (theta, 2) != (2 .^ d - d - 1) || (size (theta, 1) != 1 && size (theta, 1) != n)) error ("copulacdf: theta must be a row vector of length 2^d-d-1 or a matrix of size n x (2^d-d-1)"); endif if (n > 1 && size (theta, 1) == 1) theta = repmat (theta, n, 1); endif otherwise error ("copulacdf: unknown copula family '%s'", family); endswitch if (n == 0) # Input is empty p = zeros (0, 1); else # Truncate input to unit hypercube x(x < 0) = 0; x(x > 1) = 1; # Compute the cumulative distribution function according to family switch (lower_family) case {"gaussian"} # The Gaussian family p = mvncdf (norminv (x), zeros (1, d), theta); # No parameter bounds check k = []; case {"t"} # The Student's t family p = mvtcdf (tinv (x, nu), theta, nu); # No parameter bounds check k = []; case {"clayton"} # The Clayton family p = exp (-log (max (sum (x .^ (repmat (-theta, 1, d)), 2) - d + 1, 0)) ./ theta); # Product copula at columns where theta == 0 k = find (theta == 0); if (any (k)) p(k) = prod (x(k, :), 2); endif # Check bounds if (d > 2) k = find (! (theta >= 0) | ! (theta < inf)); else k = find (! (theta >= -1) | ! (theta < inf)); endif case {"gumbel"} # The Gumbel-Hougaard family p = exp (-(sum ((-log (x)) .^ repmat (theta, 1, d), 2)) .^ (1 ./ theta)); # Check bounds k = find (! (theta >= 1) | ! (theta < inf)); case {"frank"} # The Frank family p = -log (1 + (prod (expm1 (repmat (-theta, 1, d) .* x), 2)) ./ (expm1 (-theta) .^ (d - 1))) ./ theta; # Product copula at columns where theta == 0 k = find (theta == 0); if (any (k)) p(k) = prod (x(k, :), 2); endif # Check bounds if (d > 2) k = find (! (theta > 0) | ! (theta < inf)); else k = find (! (theta > -inf) | ! (theta < inf)); endif case {"amh"} # The Ali-Mikhail-Haq family p = (theta - 1) ./ (theta - prod ((1 + repmat (theta, 1, d) .* (x - 1)) ./ x, 2)); # Check bounds if (d > 2) k = find (! (theta >= 0) | ! (theta < 1)); else k = find (! (theta >= -1) | ! (theta < 1)); endif case {"fgm"} # The Farlie-Gumbel-Morgenstern family # All binary combinations bcomb = logical (floor (mod (((0:(2 .^ d - 1))' * 2 .^ ((1 - d):0)), 2))); ecomb = ones (size (bcomb)); ecomb(bcomb) = -1; # Summation over all combinations of order >= 2 bcomb = bcomb(sum (bcomb, 2) >= 2, end:-1:1); # Linear constraints matrix ac = zeros (size (ecomb, 1), size (bcomb, 1)); # Matrix to compute p ap = zeros (size (x, 1), size (bcomb, 1)); for i = 1:size (bcomb, 1) ac(:, i) = -prod (ecomb(:, bcomb(i, :)), 2); ap(:, i) = prod (1 - x(:, bcomb(i, :)), 2); endfor p = prod (x, 2) .* (1 + sum (ap .* theta, 2)); # Check linear constraints k = false (n, 1); for i = 1:n k(i) = any (ac * theta(i, :)' > 1); endfor endswitch # Out of bounds parameters if (any (k)) p(k) = NaN; endif endif endfunction %!test %! x = [0.2:0.2:0.6; 0.2:0.2:0.6]; %! theta = [1; 2]; %! p = copulacdf ("Clayton", x, theta); %! expected_p = [0.1395; 0.1767]; %! assert (p, expected_p, 0.001); %!test %! x = [0.2:0.2:0.6; 0.2:0.2:0.6]; %! p = copulacdf ("Gumbel", x, 2); %! expected_p = [0.1464; 0.1464]; %! assert (p, expected_p, 0.001); %!test %! x = [0.2:0.2:0.6; 0.2:0.2:0.6]; %! theta = [1; 2]; %! p = copulacdf ("Frank", x, theta); %! expected_p = [0.0699; 0.0930]; %! assert (p, expected_p, 0.001); %!test %! x = [0.2:0.2:0.6; 0.2:0.2:0.6]; %! theta = [0.3; 0.7]; %! p = copulacdf ("AMH", x, theta); %! expected_p = [0.0629; 0.0959]; %! assert (p, expected_p, 0.001); %!test %! x = [0.2:0.2:0.6; 0.2:0.1:0.4]; %! theta = [0.2, 0.1, 0.1, 0.05]; %! p = copulacdf ("FGM", x, theta); %! expected_p = [0.0558; 0.0293]; %! assert (p, expected_p, 0.001); statistics-1.3.0/inst/copulapdf.m0000755000000000000000000001372512776476211015173 0ustar 00000000000000## Copyright (C) 2008 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{p} =} copulapdf (@var{family}, @var{x}, @var{theta}) ## Compute the probability density function of a copula family. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{family} is the copula family name. Currently, @var{family} can ## be @code{'Clayton'} for the Clayton family, @code{'Gumbel'} for the ## Gumbel-Hougaard family, @code{'Frank'} for the Frank family, or ## @code{'AMH'} for the Ali-Mikhail-Haq family. ## ## @item ## @var{x} is the support where each row corresponds to an observation. ## ## @item ## @var{theta} is the parameter of the copula. The elements of ## @var{theta} must be greater than or equal to @code{-1} for the ## Clayton family, greater than or equal to @code{1} for the ## Gumbel-Hougaard family, arbitrary for the Frank family, and greater ## than or equal to @code{-1} and lower than @code{1} for the ## Ali-Mikhail-Haq family. Moreover, @var{theta} must be non-negative ## for dimensions greater than @code{2}. @var{theta} must be a column ## vector with the same number of rows as @var{x} or be scalar. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{p} is the probability density of the copula at each row of ## @var{x} and corresponding parameter @var{theta}. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = [0.2:0.2:0.6; 0.2:0.2:0.6]; ## theta = [1; 2]; ## p = copulapdf ("Clayton", x, theta) ## @end group ## ## @group ## p = copulapdf ("Gumbel", x, 2) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Roger B. Nelsen. @cite{An Introduction to Copulas}. Springer, ## New York, second edition, 2006. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: PDF of a copula family function p = copulapdf (family, x, theta) # Check arguments if (nargin != 3) print_usage (); endif if (! ischar (family)) error ("copulapdf: family must be one of 'Clayton', 'Gumbel', 'Frank', and 'AMH'"); endif if (! isempty (x) && ! ismatrix (x)) error ("copulapdf: x must be a numeric matrix"); endif [n, d] = size (x); if (! isvector (theta) || (! isscalar (theta) && size (theta, 1) != n)) error ("copulapdf: theta must be a column vector with the same number of rows as x or be scalar"); endif if (n == 0) # Input is empty p = zeros (0, 1); else if (n > 1 && isscalar (theta)) theta = repmat (theta, n, 1); endif # Truncate input to unit hypercube x(x < 0) = 0; x(x > 1) = 1; # Compute the cumulative distribution function according to family lowerarg = lower (family); if (strcmp (lowerarg, "clayton")) # The Clayton family log_cdf = -log (max (sum (x .^ (repmat (-theta, 1, d)), 2) - d + 1, 0)) ./ theta; p = prod (repmat (theta, 1, d) .* repmat (0:(d - 1), n, 1) + 1, 2) .* exp ((1 + theta .* d) .* log_cdf - (theta + 1) .* sum (log (x), 2)); # Product copula at columns where theta == 0 k = find (theta == 0); if (any (k)) p(k) = 1; endif # Check theta if (d > 2) k = find (! (theta >= 0) | ! (theta < inf)); else k = find (! (theta >= -1) | ! (theta < inf)); endif elseif (strcmp (lowerarg, "gumbel")) # The Gumbel-Hougaard family g = sum ((-log (x)) .^ repmat (theta, 1, d), 2); c = exp (-g .^ (1 ./ theta)); p = ((prod (-log (x), 2)) .^ (theta - 1)) ./ prod (x, 2) .* c .* (g .^ (2 ./ theta - 2) + (theta - 1) .* g .^ (1 ./ theta - 2)); # Check theta k = find (! (theta >= 1) | ! (theta < inf)); elseif (strcmp (lowerarg, "frank")) # The Frank family if (d != 2) error ("copulapdf: Frank copula PDF implemented as bivariate only"); endif p = (theta .* exp (theta .* (1 + sum (x, 2))) .* (exp (theta) - 1))./ (exp (theta) - exp (theta + theta .* x(:, 1)) + exp (theta .* sum (x, 2)) - exp (theta + theta .* x(:, 2))) .^ 2; # Product copula at columns where theta == 0 k = find (theta == 0); if (any (k)) p(k) = 1; endif # Check theta k = find (! (theta > -inf) | ! (theta < inf)); elseif (strcmp (lowerarg, "amh")) # The Ali-Mikhail-Haq family if (d != 2) error ("copulapdf: Ali-Mikhail-Haq copula PDF implemented as bivariate only"); endif z = theta .* prod (x - 1, 2) - 1; p = (theta .* (1 - sum (x, 2) - prod (x, 2) - z) - 1) ./ (z .^ 3); # Check theta k = find (! (theta >= -1) | ! (theta < 1)); else error ("copulapdf: unknown copula family '%s'", family); endif if (any (k)) p(k) = NaN; endif endif endfunction %!test %! x = [0.2:0.2:0.6; 0.2:0.2:0.6]; %! theta = [1; 2]; %! p = copulapdf ("Clayton", x, theta); %! expected_p = [0.9872; 0.7295]; %! assert (p, expected_p, 0.001); %!test %! x = [0.2:0.2:0.6; 0.2:0.2:0.6]; %! p = copulapdf ("Gumbel", x, 2); %! expected_p = [0.9468; 0.9468]; %! assert (p, expected_p, 0.001); %!test %! x = [0.2, 0.6; 0.2, 0.6]; %! theta = [1; 2]; %! p = copulapdf ("Frank", x, theta); %! expected_p = [0.9378; 0.8678]; %! assert (p, expected_p, 0.001); %!test %! x = [0.2, 0.6; 0.2, 0.6]; %! theta = [0.3; 0.7]; %! p = copulapdf ("AMH", x, theta); %! expected_p = [0.9540; 0.8577]; %! assert (p, expected_p, 0.001); statistics-1.3.0/inst/copularnd.m0000755000000000000000000001762412776476211015207 0ustar 00000000000000## Copyright (C) 2012 Arno Onken ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{x} =} copularnd (@var{family}, @var{theta}, @var{n}) ## @deftypefnx {Function File} {} copularnd (@var{family}, @var{theta}, @var{n}, @var{d}) ## @deftypefnx {Function File} {} copularnd ('t', @var{theta}, @var{nu}, @var{n}) ## Generate random samples from a copula family. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{family} is the copula family name. Currently, @var{family} can be ## @code{'Gaussian'} for the Gaussian family, @code{'t'} for the Student's t ## family, or @code{'Clayton'} for the Clayton family. ## ## @item ## @var{theta} is the parameter of the copula. For the Gaussian and Student's t ## copula, @var{theta} must be a correlation matrix. For bivariate copulas ## @var{theta} can also be a correlation coefficient. For the Clayton family, ## @var{theta} must be a vector with the same number of elements as samples to ## be generated or be scalar. ## ## @item ## @var{nu} is the degrees of freedom for the Student's t family. @var{nu} must ## be a vector with the same number of elements as samples to be generated or ## be scalar. ## ## @item ## @var{n} is the number of rows of the matrix to be generated. @var{n} must be ## a non-negative integer and corresponds to the number of samples to be ## generated. ## ## @item ## @var{d} is the number of columns of the matrix to be generated. @var{d} must ## be a positive integer and corresponds to the dimension of the copula. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{x} is a matrix of random samples from the copula with @var{n} samples ## of distribution dimension @var{d}. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## theta = 0.5; ## x = copularnd ("Gaussian", theta); ## @end group ## ## @group ## theta = 0.5; ## nu = 2; ## x = copularnd ("t", theta, nu); ## @end group ## ## @group ## theta = 0.5; ## n = 2; ## x = copularnd ("Clayton", theta, n); ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Roger B. Nelsen. @cite{An Introduction to Copulas}. Springer, New York, ## second edition, 2006. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Random samples from a copula family function x = copularnd (family, theta, nu, n) # Check arguments if (nargin < 2) print_usage (); endif if (! ischar (family)) error ("copularnd: family must be one of 'Gaussian', 't', and 'Clayton'"); endif lower_family = lower (family); # Check family and copula parameters switch (lower_family) case {"gaussian"} # Gaussian family if (isscalar (theta)) # Expand a scalar to a correlation matrix theta = [1, theta; theta, 1]; endif if (! ismatrix (theta) || any (diag (theta) != 1) || any (any (theta != theta')) || min (eig (theta)) <= 0) error ("copularnd: theta must be a correlation matrix"); endif if (nargin > 3) d = n; if (! isscalar (d) || d != size (theta, 1)) error ("copularnd: d must correspond to dimension of theta"); endif else d = size (theta, 1); endif if (nargin < 3) n = 1; else n = nu; if (! isscalar (n) || (n < 0) || round (n) != n) error ("copularnd: n must be a non-negative integer"); endif endif case {"t"} # Student's t family if (nargin < 3) print_usage (); endif if (isscalar (theta)) # Expand a scalar to a correlation matrix theta = [1, theta; theta, 1]; endif if (! ismatrix (theta) || any (diag (theta) != 1) || any (any (theta != theta')) || min (eig (theta)) <= 0) error ("copularnd: theta must be a correlation matrix"); endif if (! isscalar (nu) && (! isvector (nu) || length (nu) != n)) error ("copularnd: nu must be a vector with the same number of rows as x or be scalar"); endif nu = nu(:); if (nargin < 4) n = 1; else if (! isscalar (n) || (n < 0) || round (n) != n) error ("copularnd: n must be a non-negative integer"); endif endif case {"clayton"} # Archimedian one parameter family if (nargin < 4) # Default is bivariate d = 2; else d = n; if (! isscalar (d) || (d < 2) || round (d) != d) error ("copularnd: d must be an integer greater than 1"); endif endif if (nargin < 3) # Default is one sample n = 1; else n = nu; if (! isscalar (n) || (n < 0) || round (n) != n) error ("copularnd: n must be a non-negative integer"); endif endif if (! isvector (theta) || (! isscalar (theta) && size (theta, 1) != n)) error ("copularnd: theta must be a column vector with the number of rows equal to n or be scalar"); endif if (n > 1 && isscalar (theta)) theta = repmat (theta, n, 1); endif otherwise error ("copularnd: unknown copula family '%s'", family); endswitch if (n == 0) # Input is empty x = zeros (0, d); else # Draw random samples according to family switch (lower_family) case {"gaussian"} # The Gaussian family x = normcdf (mvnrnd (zeros (1, d), theta, n), 0, 1); # No parameter bounds check k = []; case {"t"} # The Student's t family x = tcdf (mvtrnd (theta, nu, n), nu); # No parameter bounds check k = []; case {"clayton"} # The Clayton family u = rand (n, d); if (d == 2) x = zeros (n, 2); # Conditional distribution method for the bivariate case which also # works for theta < 0 x(:, 1) = u(:, 1); x(:, 2) = (1 + u(:, 1) .^ (-theta) .* (u(:, 2) .^ (-theta ./ (1 + theta)) - 1)) .^ (-1 ./ theta); else # Apply the algorithm by Marshall and Olkin: # Frailty distribution for Clayton copula is gamma y = randg (1 ./ theta, n, 1); x = (1 - log (u) ./ repmat (y, 1, d)) .^ (-1 ./ repmat (theta, 1, d)); endif k = find (theta == 0); if (any (k)) # Produkt copula at columns k x(k, :) = u(k, :); endif # Continue argument check if (d == 2) k = find (! (theta >= -1) | ! (theta < inf)); else k = find (! (theta >= 0) | ! (theta < inf)); endif endswitch # Out of bounds parameters if (any (k)) x(k, :) = NaN; endif endif endfunction %!test %! theta = 0.5; %! x = copularnd ("Gaussian", theta); %! assert (size (x), [1, 2]); %! assert (all ((x >= 0) & (x <= 1))); %!test %! theta = 0.5; %! nu = 2; %! x = copularnd ("t", theta, nu); %! assert (size (x), [1, 2]); %! assert (all ((x >= 0) & (x <= 1))); %!test %! theta = 0.5; %! x = copularnd ("Clayton", theta); %! assert (size (x), [1, 2]); %! assert (all ((x >= 0) & (x <= 1))); %!test %! theta = 0.5; %! n = 2; %! x = copularnd ("Clayton", theta, n); %! assert (size (x), [n, 2]); %! assert (all ((x >= 0) & (x <= 1))); %!test %! theta = [1; 2]; %! n = 2; %! d = 3; %! x = copularnd ("Clayton", theta, n, d); %! assert (size (x), [n, d]); %! assert (all ((x >= 0) & (x <= 1))); statistics-1.3.0/inst/crossval.m0000755000000000000000000001365612776476211015055 0ustar 00000000000000## Copyright (C) 2014 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; If not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{results} =} crossval (@var{f}, @var{X}, @var{y}[, @var{params}]) ## Perform cross validation on given data. ## ## @var{f} should be a function that takes 4 inputs @var{xtrain}, @var{ytrain}, ## @var{xtest}, @var{ytest}, fits a model based on @var{xtrain}, @var{ytrain}, ## applies the fitted model to @var{xtest}, and returns a goodness of fit ## measure based on comparing the predicted and actual @var{ytest}. ## @code{crossval} returns an array containing the values returned by @var{f} ## for every cross-validation fold or resampling applied to the given data. ## ## @var{X} should be an @var{n} by @var{m} matrix of predictor values ## ## @var{y} should be an @var{n} by @var{1} vector of predicand values ## ## @var{params} may include parameter-value pairs as follows: ## ## @table @asis ## @item @qcode{"KFold"} ## Divide set into @var{k} equal-size subsets, using each one successively ## for validation. ## ## @item @qcode{"HoldOut"} ## Divide set into two subsets, training and validation. If the value ## @var{k} is a fraction, that is the fraction of values put in the ## validation subset (by default @var{k}=0.1); if it is a positive integer, ## that is the number of values in the validation subset. ## ## @item @qcode{"LeaveOut"} ## Leave-one-out partition (each element is placed in its own subset). ## The value is ignored. ## ## @item @qcode{"Partition"} ## The value should be a @var{cvpartition} object. ## ## @item @qcode{"Given"} ## The value should be an @var{n} by @var{1} vector specifying in which ## partition to put each element. ## ## @item @qcode{"stratify"} ## The value should be an @var{n} by @var{1} vector containing class ## designations for the elements, in which case the @qcode{"KFold"} and ## @qcode{"HoldOut"} partitionings attempt to ensure each partition ## represents the classes proportionately. ## ## @item @qcode{"mcreps"} ## The value should be a positive integer specifying the number of times ## to resample based on different partitionings. Currently only works with ## the partition type @qcode{"HoldOut"}. ## ## @end table ## ## Only one of @qcode{"KFold"}, @qcode{"HoldOut"}, @qcode{"LeaveOut"}, ## @qcode{"Given"}, @qcode{"Partition"} should be specified. If none is ## specified, the default is @qcode{"KFold"} with @var{k} = 10. ## ## @seealso{cvpartition} ## @end deftypefn ## Author: Nir Krakauer function results = crossval (f, X, y, varargin) [n m] = size (X); if numel(y) != n error('X, y sizes incompatible') endif #extract optional parameter-value argument pairs if numel(varargin) > 1 vargs = varargin; nargs = numel (vargs); values = vargs(2:2:nargs); names = vargs(1:2:nargs)(1:numel(values)); validnames = {'KFold', 'HoldOut', 'LeaveOut', 'Partition', 'Given', 'stratify', 'mcreps'}; for i = 1:numel(names) names(i) = validatestring (names(i){:}, validnames); end for i = 1:numel(validnames) name = validnames(i){:}; name_pos = strmatch (name, names); if !isempty(name_pos) eval([name ' = values(name_pos){:};']) endif endfor endif #construct CV partition if exist ("Partition", "var") P = Partition; elseif exist ("Given", "var") P = cvpartition (Given, "Given"); elseif exist ("KFold", "var") if !exist ("stratify", "var") stratify = n; endif P = cvpartition (stratify, "KFold", KFold); elseif exist ("HoldOut", "var") if !exist ("stratify", "var") stratify = n; endif P = cvpartition (stratify, "HoldOut", HoldOut); if !exist ("mcreps", "var") || isempty (mcreps) mcreps = 1; endif elseif exist ("LeaveOut", "var") P = cvpartition (n, "LeaveOut"); else #KFold if !exist ("stratify", "var") stratify = n; endif P = cvpartition (stratify, "KFold"); endif nr = get(P, "NumTestSets"); #number of test sets to do cross validation on nreps = 1; if strcmp(get(P, "Type"), 'holdout') && exist("mcreps", "var") && mcreps > 1 nreps = mcreps; endif results = nan (nreps, nr); for rep = 1:nreps if rep > 1 P = repartition (P); endif for i = 1:nr inds_train = training (P, i); inds_test = test (P, i); result = f (X(inds_train, :), y(inds_train), X(inds_test, :), y(inds_test)); results(rep, i) = result; endfor endfor endfunction %!test %! load fisheriris.txt %! y = fisheriris(:, 2); %! X = [ones(size(y)) fisheriris(:, 3:5)]; %! f = @(X1, y1, X2, y2) meansq (y2 - X2*regress(y1, X1)); %! results0 = crossval (f, X, y); %! results1 = crossval (f, X, y, 'KFold', 10); %! folds = 5; %! results2 = crossval (f, X, y, 'KFold', folds); %! results3 = crossval (f, X, y, 'Partition', cvpartition (numel (y), 'KFold', folds)); %! results4 = crossval (f, X, y, 'LeaveOut', 1); %! mcreps = 2; n_holdout = 20; %! results5 = crossval (f, X, y, 'HoldOut', n_holdout, 'mcreps', mcreps); %! %! ## ensure equal representation of iris species in the training set -- tends %! ## to slightly reduce cross-validation mean square error %! results6 = crossval (f, X, y, 'KFold', 5, 'stratify', fisheriris(:, 1)); %! %! assert (results0, results1); %! assert (results2, results3); %! assert (size(results4), [1 numel(y)]); %! assert (mean(results4), 4.5304, 1E-4); %! assert (size(results5), [mcreps 1]); statistics-1.3.0/inst/dcov.m0000755000000000000000000000455112776476211014146 0ustar 00000000000000## Copyright (C) 2014 - Maria L. Rizzo and Gabor J. Szekely ## Copyright (C) 2014 Juan Pablo Carbajal ## This work is derived from the R energy package. It was adapted ## for Octave by Juan Pablo Carbajal. ## ## This progrm is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## Author: Juan Pablo Carbajal ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{dCor}, @var{dCov}, @var{dVarX}, @var{dVarY}] =} dcov (@var{x}, @var{y}, @var{index}=1) ## Distance covariance and correlation statistics. ## ## It returns distace correlation (@var{dCor}), ## distance covariance (@var{dCov}), diatance variace on x (@var{dVarX}) and ## distance variance on y (@var{dVarY}). ## ## Reference: https://en.wikipedia.org/wiki/Distance_correlation ## ## @seealso{cov} ## @end deftypefn function [dCov, dCor, dVarX, dVarY] = dcov (x,y,index=1.0) %x = abs(x - x.'); %y = abs(y - y.'); x = bsxfun (@minus, x, x.'); y = bsxfun (@minus, y, y.'); [n nc] = size (x); [m mc] = size (y); if (n != m) error ("Octave:invalid-input-arg", "Sample sizes must agree."); endif if any (isnan (x) | isnan (y)) error ("Octave:invalid-input-arg","Data contains missing or infinite values."); endif if index < 0 || index > 2 warning ("Octave:invalid-input-arg","index must be in [0,2), using default index=1"); index = 1.0; endif A = Akl (x, index); B = Akl (y, index); dCov = sqrt (mean (A(:) .* B(:))); dVarX = sqrt (mean (A(:).^2) ); dVarY = sqrt (mean (B(:).^2) ); V = sqrt (dVarX .* dVarY); if V > 0 dCor = dCov / V; else dCor = 0; end endfunction function c = Akl (x, index) d = x .^ index; m = mean (d, 2); M = mean (d(:)); %c = d - m - m.' + M; c = d - bsxfun (@plus, m, m.') + M; endfunction statistics-1.3.0/inst/dendrogram.m0000755000000000000000000000641612776476211015337 0ustar 00000000000000%% Copyright (c) 2012 Juan Pablo Carbajal %% %% This program is free software: you can redistribute it and/or modify %% it under the terms of the GNU General Public License as published by %% the Free Software Foundation, either version 3 of the License, or %% any later version. %% %% This program is distributed in the hope that it will be useful, %% but WITHOUT ANY WARRANTY; without even the implied warranty of %% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the %% GNU General Public License for more details. %% %% You should have received a copy of the GNU General Public License %% along with this program. If not, see . %% -*- texinfo -*- %% @deftypefn {Function File} {@var{p} = } dendrogram (@var{tree}) %% @deftypefnx {Function File} {@var{p, t} = } dendrogram (@var{tree}) %% @deftypefnx {Function File} {@var{p, t, perm} = } dendrogram (@var{tree}) %% Plots a dendrogram using the output of function @command{linkage}. %% %% t is a vector containing the leaf node number for each object in the %% original dataset. For now, all objects are leaf nodes. %% %% perm is the permutation of the input objects used to display the %% dendrogram, in left-to-right order. %% %% TODO: Return handle to lines to set properties %% TODO: Rescale the plot automatically based on data. %% %% @seealso{linkage} %% @end deftypefn function [p, t, perm] = dendrogram (tree) [m d] = size (tree); if d != 3 error ("Input data must be a tree as returned by function linkage.") end n = m + 1; % t is the leaf node number for all objects in the original dataset. % TODO: Add support for collapsing the tree. % For now, we always display all objects, so this is the identity map. t = (1:m)'; nc = max(tree(:,1:2)(:)); % Vector with the horizontal and vertical position of each cluster p = zeros (nc,2); perm = zeros (n,1); %% Ordering by depth-first search nodecount = 0; nodes_to_visit = nc+1; while !isempty(nodes_to_visit) currentnode = nodes_to_visit(1); nodes_to_visit(1) = []; if currentnode > n node = currentnode - n; nodes_to_visit = [tree(node,[2 1]) nodes_to_visit]; end if currentnode <= n && p(currentnode,1) == 0 nodecount +=1; p(currentnode,1) = nodecount; perm(nodecount) = currentnode; end end % Compute the horizontal position, begin-end % and vertical position of all clusters. for i = 1:m p(n+i,1) = mean (p(tree(i,1:2),1)); p(n+i,2) = tree(i,3); x(i,1:2) = p(tree(i,1:2),1); end figure(gcf) % plot horizontal lines tmp = line (x', tree(:,[3 3])'); % plot vertical lines [~,tf] = ismember (1:nc, tree(:,1:2)); [ind,~] = ind2sub (size (tree(:,1:2)), tf); y = [p(1:nc,2) tree(ind,3)]; tmp = line ([p(1:nc,1) p(1:nc,1)]',y'); xticks = 1:n; xl_txt = arrayfun (@num2str, perm,"uniformoutput",false); set (gca,"xticklabel",xl_txt,"xtick",xticks); axis ([0.5 n+0.5 0 max(tree(:,3))+0.1*min(tree(:,3))]); endfunction %!demo %! y = [4 5; 2 6; 3 7; 8 9; 1 10]; %! y(:,3) = 1:5; %! figure(gcf); clf; %! dendrogram(y); %!demo %! v = 2*rand(30,1)-1; %! d = abs(bsxfun(@minus, v(:,1), v(:,1)')); %! y = linkage (squareform(d,"tovector")); %! figure(gcf); clf; %! dendrogram(y); statistics-1.3.0/inst/expstat.m0000755000000000000000000000450112776476211014676 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} expstat (@var{l}) ## Compute mean and variance of the exponential distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{l} is the parameter of the exponential distribution. The ## elements of @var{l} must be positive ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the exponential distribution ## ## @item ## @var{v} is the variance of the exponential distribution ## @end itemize ## ## @subheading Example ## ## @example ## @group ## l = 1:6; ## [m, v] = expstat (l) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the exponential distribution function [m, v] = expstat (l) # Check arguments if (nargin != 1) print_usage (); endif if (! isempty (l) && ! ismatrix (l)) error ("expstat: l must be a numeric matrix"); endif # Calculate moments m = l; v = m .^ 2; # Continue argument check k = find (! (l > 0) | ! (l < Inf)); if (any (k)) m(k) = NaN; v(k) = NaN; endif endfunction %!test %! l = 1:6; %! [m, v] = expstat (l); %! assert (m, [1, 2, 3, 4, 5, 6], 0.001); %! assert (v, [1, 4, 9, 16, 25, 36], 0.001); statistics-1.3.0/inst/ff2n.m0000755000000000000000000000047612776476211014050 0ustar 00000000000000## Author: Paul Kienzle ## This program is granted to the public domain. ## -*- texinfo -*- ## @deftypefn {Function File} ff2n (@var{n}) ## Full-factor design with n binary terms. ## ## @seealso {fullfact} ## @end deftypefn function A=ff2n(n) A = fullfact (2 * ones (1,n)) - 1; endfunction statistics-1.3.0/inst/fisheriris.txt0000755000000000000000000000414212776476211015741 0ustar 00000000000000#Fisher iris data set #cf. https://en.wikipedia.org/wiki/Iris_flower_data_set #Type PW PL SW SL 0 2 14 33 50 1 24 56 31 67 1 23 51 31 69 0 2 10 36 46 1 20 52 30 65 1 19 51 27 58 2 13 45 28 57 2 16 47 33 63 1 17 45 25 49 2 14 47 32 70 0 2 16 31 48 1 19 50 25 63 0 1 14 36 49 0 2 13 32 44 2 12 40 26 58 1 18 49 27 63 2 10 33 23 50 0 2 16 38 51 0 2 16 30 50 1 21 56 28 64 0 4 19 38 51 0 2 14 30 49 2 10 41 27 58 2 15 45 29 60 0 2 14 36 50 1 19 51 27 58 0 4 15 34 54 1 18 55 31 64 2 10 33 24 49 0 2 14 42 55 1 15 50 22 60 2 14 39 27 52 0 2 14 29 44 2 12 39 27 58 1 23 57 32 69 2 15 42 30 59 1 20 49 28 56 1 18 58 25 67 2 13 44 23 63 2 15 49 25 63 2 11 30 25 51 1 21 54 31 69 1 25 61 36 72 2 13 36 29 56 1 21 55 30 68 0 1 14 30 48 0 3 17 38 57 2 14 44 30 66 0 4 15 37 51 2 17 50 30 67 1 22 56 28 64 1 15 51 28 63 2 15 45 22 62 2 14 46 30 61 2 11 39 25 56 1 23 59 32 68 1 23 54 34 62 1 25 57 33 67 0 2 13 35 55 2 15 45 32 64 1 18 51 30 59 1 23 53 32 64 2 15 45 30 54 1 21 57 33 67 0 2 13 30 44 0 2 16 32 47 1 18 60 32 72 1 18 49 30 61 0 2 12 32 50 0 1 11 30 43 2 14 44 31 67 0 2 14 35 51 0 4 16 34 50 2 10 35 26 57 1 23 61 30 77 2 13 42 26 57 0 1 15 41 52 1 18 48 30 60 2 13 42 27 56 0 2 15 31 49 0 4 17 39 54 2 16 45 34 60 2 10 35 20 50 0 2 13 32 47 2 13 54 29 62 0 2 15 34 51 2 10 50 22 60 0 1 15 31 49 0 2 15 37 54 2 12 47 28 61 2 13 41 28 57 0 4 13 39 54 1 20 51 32 65 2 15 49 31 69 2 13 40 25 55 0 3 13 23 45 0 3 15 38 51 2 14 48 28 68 0 2 15 35 52 1 25 60 33 63 2 15 46 28 65 0 3 14 34 46 2 18 48 32 59 2 16 51 27 60 1 18 55 30 65 0 5 17 33 51 1 22 67 38 77 1 21 66 30 76 1 13 52 30 67 2 13 40 28 61 2 11 38 24 55 0 2 14 34 52 1 20 64 38 79 0 6 16 35 50 1 20 67 28 77 2 12 44 26 55 0 3 14 30 48 0 2 19 34 48 1 14 56 26 61 0 2 12 40 58 1 18 48 28 62 2 15 45 30 56 0 2 14 32 46 0 4 15 44 57 1 24 56 34 63 1 16 58 30 72 1 21 59 30 71 1 18 56 29 63 2 12 42 30 57 1 23 69 26 77 2 13 56 29 66 0 2 15 34 52 2 10 37 24 55 0 2 15 31 46 1 19 61 28 74 0 3 13 35 50 1 18 63 29 73 2 15 47 31 67 2 13 41 30 56 2 13 43 29 64 1 22 58 30 65 0 3 14 35 51 2 14 47 29 61 1 19 53 27 64 0 2 16 34 48 1 20 50 25 57 2 13 40 23 55 0 2 17 34 54 1 24 51 28 58 0 2 15 37 53 statistics-1.3.0/inst/fstat.m0000755000000000000000000000672412776476211014340 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{mn}, @var{v}] =} fstat (@var{m}, @var{n}) ## Compute mean and variance of the F distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{m} is the first parameter of the F distribution. The elements ## of @var{m} must be positive ## ## @item ## @var{n} is the second parameter of the F distribution. The ## elements of @var{n} must be positive ## @end itemize ## @var{m} and @var{n} must be of common size or one of them must be scalar ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{mn} is the mean of the F distribution. The mean is undefined for ## @var{n} not greater than 2 ## ## @item ## @var{v} is the variance of the F distribution. The variance is undefined ## for @var{n} not greater than 4 ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## m = 1:6; ## n = 5:10; ## [mn, v] = fstat (m, n) ## @end group ## ## @group ## [mn, v] = fstat (m, 5) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the F distribution function [mn, v] = fstat (m, n) # Check arguments if (nargin != 2) print_usage (); endif if (! isempty (m) && ! ismatrix (m)) error ("fstat: m must be a numeric matrix"); endif if (! isempty (n) && ! ismatrix (n)) error ("fstat: n must be a numeric matrix"); endif if (! isscalar (m) || ! isscalar (n)) [retval, m, n] = common_size (m, n); if (retval > 0) error ("fstat: m and n must be of common size or scalar"); endif endif # Calculate moments mn = n ./ (n - 2); v = (2 .* (n .^ 2) .* (m + n - 2)) ./ (m .* ((n - 2) .^ 2) .* (n - 4)); # Continue argument check k = find (! (m > 0) | ! (m < Inf) | ! (n > 2) | ! (n < Inf)); if (any (k)) mn(k) = NaN; v(k) = NaN; endif k = find (! (n > 4)); if (any (k)) v(k) = NaN; endif endfunction %!test %! m = 1:6; %! n = 5:10; %! [mn, v] = fstat (m, n); %! expected_mn = [1.6667, 1.5000, 1.4000, 1.3333, 1.2857, 1.2500]; %! expected_v = [22.2222, 6.7500, 3.4844, 2.2222, 1.5869, 1.2153]; %! assert (mn, expected_mn, 0.001); %! assert (v, expected_v, 0.001); %!test %! m = 1:6; %! [mn, v] = fstat (m, 5); %! expected_mn = [1.6667, 1.6667, 1.6667, 1.6667, 1.6667, 1.6667]; %! expected_v = [22.2222, 13.8889, 11.1111, 9.7222, 8.8889, 8.3333]; %! assert (mn, expected_mn, 0.001); %! assert (v, expected_v, 0.001); statistics-1.3.0/inst/fullfact.m0000755000000000000000000000140412776476211015005 0ustar 00000000000000## Author: Paul Kienzle ## This program is granted to the public domain. ## -*- texinfo -*- ## @deftypefn {Function File} fullfact (@var{N}) ## Full factorial design. ## ## If @var{N} is a scalar, return the full factorial design with @var{N} binary ## choices, 0 and 1. ## ## If @var{N} is a vector, return the full factorial design with choices 1 ## through @var{n_i} for each factor @var{i}. ## ## @end deftypefn function A = fullfact(n) if length(n) == 1 % combinatorial design with n either/or choices A = fullfact(2*ones(1,n))-1; else % combinatorial design with n(i) choices per level A = [1:n(end)]'; for i=length(n)-1:-1:1 A = [kron([1:n(i)]',ones(rows(A),1)), repmat(A,n(i),1)]; end end endfunction statistics-1.3.0/inst/gamfit.m0000755000000000000000000000324712776476211014463 0ustar 00000000000000## Author: Martijn van Oosterhout ## This program is granted to the public domain. ## -*- texinfo -*- ## @deftypefn {Function File} {@var{MLE} =} gamfit (@var{data}) ## Calculate gamma distribution parameters. ## ## Find the maximum likelihood estimators (@var{mle}s) of the Gamma distribution ## of @var{data}. @var{MLE} is a two element vector with shape parameter ## @var{A} and scale @var{B}. ## ## @seealso{gampdf, gaminv, gamrnd, gamlike} ## @end deftypefn ## This function works by minimizing the value of gamlike for the vector R. ## Just about any minimization function will work, all it has to do a ## minimize for one variable. Although the gamma distribution has two ## parameters, their product is the mean of the data. so a helper function ## for the search takes one parameter, calculates the other and then returns ## the value of gamlike. ## FIXME is this still true??? ## Note: Octave uses the inverse scale parameter, which is the opposite of ## Matlab. To work for Matlab, value of b needs to be inverted in a few ## places (marked with **) function res = gamfit(R) if (nargin != 1) print_usage; endif avg = mean(R); # This can be just about any search function. I choose this because it # seemed to be the only one that might work in this situaition... a=nmsmax( @gamfit_search, 1, [], [], avg, R ); b=a/avg; # ** res=[a 1/b]; endfunction # Helper function so we only have to minimize for one variable. Also to # inverting the output of gamlike, incase the optimisation function wants to # maximize rather than minimize. function res = gamfit_search( a, avg, R ) b=a/avg; # ** res = -gamlike([a 1/b], R); endfunction statistics-1.3.0/inst/gamlike.m0000755000000000000000000000106112776476211014615 0ustar 00000000000000## Author: Martijn van Oosterhout ## This program is granted to the public domain. ## -*- texinfo -*- ## @deftypefn {Function File} {@var{X} =} gamlike ([@var{A} @var{B}], @var{R}) ## Calculates the negative log-likelihood function for the Gamma ## distribution over vector @var{R}, with the given parameters @var{A} and @var{B}. ## @seealso{gampdf, gaminv, gamrnd, gamfit} ## @end deftypefn function res = gamlike(P,K) if (nargin != 2) print_usage; endif a=P(1); b=P(2); res = -sum( log( gampdf(K, a, b) ) ); endfunction statistics-1.3.0/inst/gamstat.m0000755000000000000000000000627612776476211014661 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} gamstat (@var{a}, @var{b}) ## Compute mean and variance of the gamma distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{a} is the first parameter of the gamma distribution. @var{a} must be ## positive ## ## @item ## @var{b} is the second parameter of the gamma distribution. @var{b} must be ## positive ## @end itemize ## @var{a} and @var{b} must be of common size or one of them must be scalar ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the gamma distribution ## ## @item ## @var{v} is the variance of the gamma distribution ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## a = 1:6; ## b = 1:0.2:2; ## [m, v] = gamstat (a, b) ## @end group ## ## @group ## [m, v] = gamstat (a, 1.5) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the gamma distribution function [m, v] = gamstat (a, b) # Check arguments if (nargin != 2) print_usage (); endif if (! isempty (a) && ! ismatrix (a)) error ("gamstat: a must be a numeric matrix"); endif if (! isempty (b) && ! ismatrix (b)) error ("gamstat: b must be a numeric matrix"); endif if (! isscalar (a) || ! isscalar (b)) [retval, a, b] = common_size (a, b); if (retval > 0) error ("gamstat: a and b must be of common size or scalar"); endif endif # Calculate moments m = a .* b; v = a .* (b .^ 2); # Continue argument check k = find (! (a > 0) | ! (a < Inf) | ! (b > 0) | ! (b < Inf)); if (any (k)) m(k) = NaN; v(k) = NaN; endif endfunction %!test %! a = 1:6; %! b = 1:0.2:2; %! [m, v] = gamstat (a, b); %! expected_m = [1.00, 2.40, 4.20, 6.40, 9.00, 12.00]; %! expected_v = [1.00, 2.88, 5.88, 10.24, 16.20, 24.00]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); %!test %! a = 1:6; %! [m, v] = gamstat (a, 1.5); %! expected_m = [1.50, 3.00, 4.50, 6.00, 7.50, 9.00]; %! expected_v = [2.25, 4.50, 6.75, 9.00, 11.25, 13.50]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); statistics-1.3.0/inst/geomean.m0000755000000000000000000000214712776476211014625 0ustar 00000000000000## Copyright (C) 2001 Paul Kienzle ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} geomean (@var{x}) ## @deftypefnx{Function File} geomean (@var{x}, @var{dim}) ## Compute the geometric mean. ## ## This function does the same as @code{mean (x, "g")}. ## ## @seealso{mean} ## @end deftypefn function a = geomean(x, dim) if (nargin == 1) a = mean(x, "g"); elseif (nargin == 2) a = mean(x, "g", dim); else print_usage; endif endfunction statistics-1.3.0/inst/geostat.m0000755000000000000000000000455012776476211014660 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} geostat (@var{p}) ## Compute mean and variance of the geometric distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{p} is the rate parameter of the geometric distribution. The ## elements of @var{p} must be probabilities ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the geometric distribution ## ## @item ## @var{v} is the variance of the geometric distribution ## @end itemize ## ## @subheading Example ## ## @example ## @group ## p = 1 ./ (1:6); ## [m, v] = geostat (p) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the geometric distribution function [m, v] = geostat (p) # Check arguments if (nargin != 1) print_usage (); endif if (! isempty (p) && ! ismatrix (p)) error ("geostat: p must be a numeric matrix"); endif # Calculate moments q = 1 - p; m = q ./ p; v = q ./ (p .^ 2); # Continue argument check k = find (! (p >= 0) | ! (p <= 1)); if (any (k)) m(k) = NaN; v(k) = NaN; endif endfunction %!test %! p = 1 ./ (1:6); %! [m, v] = geostat (p); %! assert (m, [0, 1, 2, 3, 4, 5], 0.001); %! assert (v, [0, 2, 6, 12, 20, 30], 0.001); statistics-1.3.0/inst/gevcdf.m0000755000000000000000000000730012776476211014444 0ustar 00000000000000## Copyright (C) 2012 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{p} =} gevcdf (@var{x}, @var{k}, @var{sigma}, @var{mu}) ## Compute the cumulative distribution function of the generalized extreme value (GEV) distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{x} is the support. ## ## @item ## @var{k} is the shape parameter of the GEV distribution. (Also denoted gamma or xi.) ## @item ## @var{sigma} is the scale parameter of the GEV distribution. The elements ## of @var{sigma} must be positive. ## @item ## @var{mu} is the location parameter of the GEV distribution. ## @end itemize ## The inputs must be of common size, or some of them must be scalar. ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{p} is the cumulative distribution of the GEV distribution at each ## element of @var{x} and corresponding parameter values. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = 0:0.5:2.5; ## sigma = 1:6; ## k = 1; ## mu = 0; ## y = gevcdf (x, k, sigma, mu) ## @end group ## ## @group ## y = gevcdf (x, k, 0.5, mu) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Rolf-Dieter Reiss and Michael Thomas. @cite{Statistical Analysis of Extreme Values with Applications to Insurance, Finance, Hydrology and Other Fields}. Chapter 1, pages 16-17, Springer, 2007. ## ## @end enumerate ## @seealso{gevfit, gevinv, gevlike, gevpdf, gevrnd, gevstat} ## @end deftypefn ## Author: Nir Krakauer ## Description: CDF of the generalized extreme value distribution function p = gevcdf (x, k, sigma, mu) # Check arguments if (nargin != 4) print_usage (); endif if (isempty (x) || isempty (k) || isempty (sigma) || isempty (mu) || ~ismatrix (x) || ~ismatrix (k) || ~ismatrix (sigma) || ~ismatrix (mu)) error ("gevcdf: inputs must be a numeric matrices"); endif [retval, x, k, sigma, mu] = common_size (x, k, sigma, mu); if (retval > 0) error ("gevcdf: inputs must be of common size or scalars"); endif z = 1 + k .* (x - mu) ./ sigma; # Calculate pdf p = exp(-(z .^ (-1 ./ k))); p(z <= 0 & x < mu) = 0; p(z <= 0 & x > mu) = 1; inds = (abs (k) < (eps^0.7)); %use a different formula if k is very close to zero if any(inds) z = (mu(inds) - x(inds)) ./ sigma(inds); p(inds) = exp(-exp(z)); endif endfunction %!test %! x = 0:0.5:2.5; %! sigma = 1:6; %! k = 1; %! mu = 0; %! p = gevcdf (x, k, sigma, mu); %! expected_p = [0.36788 0.44933 0.47237 0.48323 0.48954 0.49367]; %! assert (p, expected_p, 0.001); %!test %! x = -0.5:0.5:2.5; %! sigma = 0.5; %! k = 1; %! mu = 0; %! p = gevcdf (x, k, sigma, mu); %! expected_p = [0 0.36788 0.60653 0.71653 0.77880 0.81873 0.84648]; %! assert (p, expected_p, 0.001); %!test #check for continuity for k near 0 %! x = 1; %! sigma = 0.5; %! k = -0.03:0.01:0.03; %! mu = 0; %! p = gevcdf (x, k, sigma, mu); %! expected_p = [0.88062 0.87820 0.87580 0.87342 0.87107 0.86874 0.86643]; %! assert (p, expected_p, 0.001); statistics-1.3.0/inst/gevfit.m0000755000000000000000000000654312776476211014502 0ustar 00000000000000## Copyright (C) 2012-2016 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{paramhat}, @var{paramci} =} gevfit (@var{data}, @var{parmguess}) ## Find the maximum likelihood estimator (@var{paramhat}) of the generalized extreme value (GEV) distribution to fit @var{data}. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{data} is the vector of given values. ## @item ## @var{parmguess} is an initial guess for the maximum likelihood parameter vector. If not given, this defaults to @var{k}=0 and @var{sigma}, @var{mu} determined by matching the data mean and standard deviation to their expected values. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{parmhat} is the 3-parameter maximum-likelihood parameter vector [@var{k}; @var{sigma}; @var{mu}], where @var{k} is the shape parameter of the GEV distribution, @var{sigma} is the scale parameter of the GEV distribution, and @var{mu} is the location parameter of the GEV distribution. ## @item ## @var{paramci} has the approximate 95% confidence intervals of the parameter values based on the Fisher information matrix at the maximum-likelihood position. ## ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## data = 1:50; ## [pfit, pci] = gevfit (data); ## p1 = gevcdf(data,pfit(1),pfit(2),pfit(3)); ## plot(data, p1) ## @end group ## @end example ## @seealso{gevcdf, gevinv, gevlike, gevpdf, gevrnd, gevstat} ## @end deftypefn ## Author: Nir Krakauer ## Description: Maximum likelihood parameter estimation for the generalized extreme value distribution function [paramhat, paramci] = gevfit (data, paramguess) # Check arguments if (nargin < 1) print_usage; endif if (nargin < 2) || isempty(paramguess) paramguess = zeros (3, 1); paramguess(2) = (sqrt(6)/pi) * std (data); paramguess(3) = mean(data) - 0.5772156649*paramguess(2) #expectation involves Euler–Mascheroni constant endif #cost function to minimize f = @(p) gevlike (p, data); paramhat = fminunc(f, paramguess, optimset("GradObj", "on")); if nargout > 1 [nlogL, ~, ACOV] = gevlike (paramhat, data); param_se = sqrt(diag(inv(ACOV))); if any(iscomplex(param_se)) warning ('gevfit: Fisher information matrix not positive definite; parameter optimization likely did not converge') paramci = nan (3, 2); else paramci(:, 1) = paramhat - 1.96*param_se; paramci(:, 2) = paramhat + 1.96*param_se; endif endif endfunction %!test %! data = 1:50; %! [pfit, pci] = gevfit (data); %! expected_p = [-0.44 15.19 21.53]'; %! expected_pu = [-0.13 19.31 26.49]'; %! assert (pfit, expected_p, 0.1); %! assert (pci(:, 2), expected_pu, 0.1); statistics-1.3.0/inst/gevfit_lmom.m0000755000000000000000000000676212776476211015531 0ustar 00000000000000## Copyright (C) 2012 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{paramhat}, @var{paramci} =} gevfit_lmom (@var{data}) ## Find an estimator (@var{paramhat}) of the generalized extreme value (GEV) distribution fitting @var{data} using the method of L-moments. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{data} is the vector of given values. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{parmhat} is the 3-parameter maximum-likelihood parameter vector [@var{k}; @var{sigma}; @var{mu}], where @var{k} is the shape parameter of the GEV distribution, @var{sigma} is the scale parameter of the GEV distribution, and @var{mu} is the location parameter of the GEV distribution. ## @item ## @var{paramci} has the approximate 95% confidence intervals of the parameter values (currently not implemented). ## ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## data = gevrnd (0.1, 1, 0, 100, 1); ## [pfit, pci] = gevfit_lmom (data); ## p1 = gevcdf (data,pfit(1),pfit(2),pfit(3)); ## [f, x] = ecdf (data); ## plot(data, p1, 's', x, f) ## @end group ## @end example ## @seealso{gevfit} ## @subheading References ## ## @enumerate ## @item ## Ailliot, P.; Thompson, C. & Thomson, P. Mixed methods for fitting the GEV distribution, Water Resources Research, 2011, 47, W05551 ## ## @end enumerate ## @end deftypefn ## Author: Nir Krakauer ## Description: L-moments parameter estimation for the generalized extreme value distribution function [paramhat, paramci] = gevfit_lmom (data) # Check arguments if (nargin < 1) print_usage; endif # find the L-moments data = sort (data(:))'; n = numel(data); L1 = mean(data); L2 = sum(data .* (2*(1:n) - n - 1)) / (2*nchoosek(n, 2)); # or mean(triu(data' - data, 1, 'pack')) / 2; b = bincoeff((1:n) - 1, 2); L3 = sum(data .* (b - 2 * ((1:n) - 1) .* (n - (1:n)) + fliplr(b))) / (3*nchoosek(n, 3)); #match the moments to the GEV distribution #first find k based on L3/L2 f = @(k) (L3/L2 + 3)/2 - limdiv((1 - 3^(k)), (1 - 2^(k))); k = fzero(f, 0); #next find sigma and mu given k if abs(k) < 1E-8 sigma = L2 / log(2); eg = 0.57721566490153286; %Euler-Mascheroni constant mu = L1 - sigma * eg; else sigma = -k*L2 / (gamma(1 - k) * (1 - 2^(k))); mu = L1 - sigma * ((gamma(1 - k) - 1) / k); endif paramhat = [k; sigma; mu]; if nargout > 1 paramci = NaN; endif endfunction #internal function to accurately evaluate (1 - 3^k)/(1 - 2^k) in the limit as k --> 0 function c = limdiv(a, b) # c = ifelse (abs(b) < 1E-8, log(3)/log(2), a ./ b); if abs(b) < 1E-8 c = log(3)/log(2); else c = a / b; endif endfunction %!test %! data = 1:50; %! [pfit, pci] = gevfit_lmom (data); %! expected_p = [-0.28 15.01 20.22]'; %! assert (pfit, expected_p, 0.1); statistics-1.3.0/inst/gevinv.m0000755000000000000000000000621212776476211014505 0ustar 00000000000000## Copyright (C) 2012 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{X} =} gevinv (@var{P}, @var{k}, @var{sigma}, @var{mu}) ## Compute a desired quantile (inverse CDF) of the generalized extreme value (GEV) distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{P} is the desired quantile of the GEV distribution. (Between 0 and 1.) ## @item ## @var{k} is the shape parameter of the GEV distribution. (Also denoted gamma or xi.) ## @item ## @var{sigma} is the scale parameter of the GEV distribution. The elements ## of @var{sigma} must be positive. ## @item ## @var{mu} is the location parameter of the GEV distribution. ## @end itemize ## The inputs must be of common size, or some of them must be scalar. ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{X} is the value corresponding to each quantile of the GEV distribution ## @end itemize ## @subheading References ## ## @enumerate ## @item ## Rolf-Dieter Reiss and Michael Thomas. @cite{Statistical Analysis of Extreme Values with Applications to Insurance, Finance, Hydrology and Other Fields}. Chapter 1, pages 16-17, Springer, 2007. ## @item ## J. R. M. Hosking (2012). @cite{L-moments}. R package, version 1.6. URL: http://CRAN.R-project.org/package=lmom. ## ## @end enumerate ## @seealso{gevcdf, gevfit, gevlike, gevpdf, gevrnd, gevstat} ## @end deftypefn ## Author: Nir Krakauer ## Description: Inverse CDF of the generalized extreme value distribution function [X] = gevinv (P, k = 0, sigma = 1, mu = 0) [retval, P, k, sigma, mu] = common_size (P, k, sigma, mu); if (retval > 0) error ("gevinv: inputs must be of common size or scalars"); endif X = P; llP = log(-log(P)); kllP = k .* llP; ii = (abs(kllP) < 1E-4); #use the Taylor series expansion of the exponential to avoid roundoff error or dividing by zero when k is small X(ii) = mu(ii) - sigma(ii) .* llP(ii) .* (1 - kllP(ii) .* (1 - kllP(ii))); X(~ii) = mu(~ii) + (sigma(~ii) ./ k(~ii)) .* (exp(-kllP(~ii)) - 1); endfunction %!test %! p = 0.1:0.1:0.9; %! k = 0; %! sigma = 1; %! mu = 0; %! x = gevinv (p, k, sigma, mu); %! c = gevcdf(x, k, sigma, mu); %! assert (c, p, 0.001); %!test %! p = 0.1:0.1:0.9; %! k = 1; %! sigma = 1; %! mu = 0; %! x = gevinv (p, k, sigma, mu); %! c = gevcdf(x, k, sigma, mu); %! assert (c, p, 0.001); %!test %! p = 0.1:0.1:0.9; %! k = 0.3; %! sigma = 1; %! mu = 0; %! x = gevinv (p, k, sigma, mu); %! c = gevcdf(x, k, sigma, mu); %! assert (c, p, 0.001); statistics-1.3.0/inst/gevlike.m0000755000000000000000000002722012776476211014637 0ustar 00000000000000## Copyright (C) 2012 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{nlogL}, @var{Grad}, @var{ACOV} =} gevlike (@var{params}, @var{data}) ## Compute the negative log-likelihood of data under the generalized extreme value (GEV) distribution with given parameter values. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{params} is the 3-parameter vector [@var{k}, @var{sigma}, @var{mu}], where @var{k} is the shape parameter of the GEV distribution, @var{sigma} is the scale parameter of the GEV distribution, and @var{mu} is the location parameter of the GEV distribution. ## @item ## @var{data} is the vector of given values. ## ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{nlogL} is the negative log-likelihood. ## @item ## @var{Grad} is the 3 by 1 gradient vector (first derivative of the negative log likelihood with respect to the parameter values) ## @item ## @var{ACOV} is the 3 by 3 Fisher information matrix (second derivative of the negative log likelihood with respect to the parameter values) ## ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = -5:-1; ## k = -0.2; ## sigma = 0.3; ## mu = 0.5; ## [L, ~, C] = gevlike ([k sigma mu], x); ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Rolf-Dieter Reiss and Michael Thomas. @cite{Statistical Analysis of Extreme Values with Applications to Insurance, Finance, Hydrology and Other Fields}. Chapter 1, pages 16-17, Springer, 2007. ## ## @end enumerate ## @seealso{gevcdf, gevfit, gevinv, gevpdf, gevrnd, gevstat} ## @end deftypefn ## Author: Nir Krakauer ## Description: Negative log-likelihood for the generalized extreme value distribution function [nlogL, Grad, ACOV] = gevlike (params, data) # Check arguments if (nargin != 2) print_usage; endif k = params(1); sigma = params(2); mu = params(3); #calculate negative log likelihood [nll, k_terms] = gevnll (data, k, sigma, mu); nlogL = sum(nll(:)); #optionally calculate the first and second derivatives of the negative log likelihood with respect to parameters if nargout > 1 [Grad, kk_terms] = gevgrad (data, k, sigma, mu, k_terms); if nargout > 2 ACOV = gevfim (data, k, sigma, mu, k_terms, kk_terms); endif endif endfunction function [nlogL, k_terms] = gevnll (x, k, sigma, mu) #internal function to calculate negative log likelihood for gevlike #no input checking done k_terms = []; a = (x - mu) ./ sigma; if all(k == 0) nlogL = exp(-a) + a + log(sigma); else aa = k .* a; if min(abs(aa)) < 1E-3 && max(abs(aa)) < 0.5 #use a series expansion to find the log likelihood more accurately when k is small k_terms = 1; sgn = 1; i = 0; while 1 sgn = -sgn; i++; newterm = (sgn / (i + 1)) * (aa .^ i); k_terms = k_terms + newterm; if max(abs(newterm)) <= eps break endif endwhile nlogL = exp(-a .* k_terms) + a .* (k + 1) .* k_terms + log(sigma); else b = 1 + aa; nlogL = b .^ (-1 ./ k) + (1 + 1 ./ k) .* log(b) + log(sigma); nlogL(b <= 0) = Inf; endif endif endfunction function [G, kk_terms] = gevgrad (x, k, sigma, mu, k_terms) #calculate the gradient of the negative log likelihood of data x with respect to the parameters of the generalized extreme value distribution for gevlike #no input checking done kk_terms = []; G = ones(3, 1); if k == 0 ##use the expressions for first derivatives that are the limits as k --> 0 a = (x - mu) ./ sigma; f = exp(-a) - 1; #k #g = -(2 * x .* (mu .* (1 - f) - sigma .* f) + 2 .* sigma .* mu .* f + (x.^2 + mu.^2).*(f - 1)) ./ (2 * f .* sigma .^ 2); g = a .* (1 + a .* f / 2); G(1) = sum(g(:)); #sigma g = (a .* f + 1) ./ sigma; G(2) = sum(g(:)); #mu g = f ./ sigma; G(3) = sum(g(:)); return endif a = (x - mu) ./ sigma; b = 1 + k .* a; if any (b <= 0) G(:) = 0; #negative log likelihood is locally infinite return endif #k c = log(b); d = 1 ./ k + 1; if nargin > 4 && ~isempty(k_terms) #use a series expansion to find the gradient more accurately when k is small aa = k .* a; f = exp(-a .* k_terms); kk_terms = 0.5; sgn = 1; i = 0; while 1 sgn = -sgn; i++; newterm = (sgn * (i + 1) / (i + 2)) * (aa .^ i); kk_terms = kk_terms + newterm; if max(abs(newterm)) <= eps break endif endwhile g = a .* ((a .* kk_terms) .* (f - 1 - k) + k_terms); else g = (c ./ k - a ./ b) ./ (k .* b .^ (1/k)) - c ./ (k .^ 2) + a .* d ./ b; endif %keyboard G(1) = sum(g(:)); #sigma if nargin > 4 && ~isempty(k_terms) #use a series expansion to find the gradient more accurately when k is small g = (1 - a .* (a .* k .* kk_terms - k_terms) .* (f - k - 1)) ./ sigma; else #g = (a .* b .^ (-d) - d .* k .* a ./ b + 1) ./ sigma; g = (a .* b .^ (-d) - (k + 1) .* a ./ b + 1) ./ sigma; endif G(2) = sum(g(:)); #mu if nargin > 4 && ~isempty(k_terms) #use a series expansion to find the gradient more accurately when k is small g = -(a .* k .* kk_terms - k_terms) .* (f - k - 1) ./ sigma; else #g = (b .^ (-d) - d .* k ./ b) ./ sigma; g = (b .^ (-d) - (k + 1) ./ b) ./ sigma; end G(3) = sum(g(:)); endfunction function ACOV = gevfim (x, k, sigma, mu, k_terms, kk_terms) #internal function to calculate the Fisher information matrix for gevlike #no input checking done #find the various second derivatives (used Maxima to help find the expressions) ACOV = ones(3); if k == 0 ##use the expressions for second derivatives that are the limits as k --> 0 #k, k a = (x - mu) ./ sigma; f = exp(-a); #der = (x .* (24 * mu .^ 2 .* sigma .* (f - 1) + 24 * mu .* sigma .^ 2 .* f - 12 * mu .^ 3) + x .^ 3 .* (8 * sigma .* (f - 1) - 12*mu) + x .^ 2 .* (-12 * sigma .^ 2 .* f + 24 * mu .* sigma .* (1 - f) + 18 * mu .^ 2) - 12 * mu .^ 2 .* sigma .^ 2 .* f + 8 * mu .^ 3 .* sigma .* (1 - f) + 3 * (x .^ 4 + mu .^ 4)) ./ (12 .* f .* sigma .^ 4); der = (a .^ 2) .* (a .* (a/4 - 2/3) .* f + 2/3 * a - 1); ACOV(1, 1) = sum(der(:)); #sigma, sigma der = (sigma .^ -2) .* (a .* ((a - 2) .* f + 2) - 1); ACOV(2, 2) = sum(der(:)); #mu, mu der = (sigma .^ -2) .* f; ACOV(3, 3) = sum(der(:)); #k, sigma #der = (x .^2 .* (2*sigma .* (f - 1) - 3*mu) + x .* (-2 * sigma .^ 2 .* f + 4 * mu .* sigma .* (1 - f) + 3 .* mu .^ 2) + 2 * mu .^ 2 .* sigma .* (f - 1) + 2 * mu * sigma .^ 2 * f + x .^ 3 - mu .^ 3) ./ (2 .* f .* sigma .^ 4); der = (-a ./ sigma) .* (a .* (1 - a/2) .* f - a + 1); ACOV(1, 2) = ACOV(2, 1) = sum(der(:)); #k, mu #der = (x .* (2*sigma .* (f - 1) - 2*mu) - 2 * f .* sigma .^ 2 + 2 .* mu .* sigma .* (1 - f) + x .^ 2 + mu .^ 2)./ (2 .* f .* sigma .^ 3); der = (-1 ./ sigma) .* (a .* (1 - a/2) .* f - a + 1); ACOV(1, 3) = ACOV(3, 1) = sum(der(:)); #sigma, mu der = (1 + (a - 1) .* f) ./ (sigma .^ 2); ACOV(2, 3) = ACOV(3, 2) = sum(der(:)); return endif #general case z = 1 + k .* (x - mu) ./ sigma; #k, k a = (x - mu) ./ sigma; b = k .* a + 1; c = log(b); d = 1 ./ k + 1; if nargin > 5 && ~isempty(kk_terms) #use a series expansion to find the derivatives more accurately when k is small aa = k .* a; f = exp(-a .* k_terms); kkk_terms = 2/3; sgn = 1; i = 0; while 1 sgn = -sgn; i++; newterm = (sgn * (i + 1) * (i + 2) / (i + 3)) * (aa .^ i); kkk_terms = kkk_terms + newterm; if max(abs(newterm)) <= eps break endif endwhile der = (a .^ 2) .* (a .* (a .* kk_terms .^ 2 - kkk_terms) .* f + a .* (1 + k) .* kkk_terms - 2 * kk_terms); else der = ((((c ./ k.^2) - (a ./ (k .* b))) .^ 2) ./ (b .^ (1 ./ k))) + ... ((-2*c ./ k.^3) + (2*a ./ (k.^2 .* b)) + ((a ./ b) .^ 2 ./ k)) ./ (b .^ (1 ./ k)) + ... 2*c ./ k.^3 - ... (2*a ./ (k.^2 .* b)) - (d .* (a ./ b) .^ 2); endif der(z <= 0) = 0; %no probability mass in this region ACOV(1, 1) = sum(der(:)); #sigma, sigma if nargin > 5 && ~isempty(kk_terms) #use a series expansion to find the derivatives more accurately when k is small der = ((-2*a .* k_terms + 4 * a .^ 2 .* k .* kk_terms - a .^ 3 .* (k .^ 2) .* kkk_terms) .* (f - k - 1) + f .* ((a .* (k_terms - a .* k .* kk_terms)) .^ 2) - 1) ./ (sigma .^ 2); else der = (sigma .^ -2) .* (... -2*a .* b .^ (-d) + ... d .* k .* a .^ 2 .* (b .^ (-d-1)) + ... 2 .* d .* k .* a ./ b - ... d .* (k .* a ./ b) .^ 2 - 1); end der(z <= 0) = 0; %no probability mass in this region ACOV(2, 2) = sum(der(:)); #mu, mu if nargin > 5 && ~isempty(kk_terms) #use a series expansion to find the derivatives involving k more accurately when k is small der = (f .* (a .* k .* kk_terms - k_terms) .^ 2 - a .* k .^ 2 .* kkk_terms .* (f - k - 1)) ./ (sigma .^ 2); else der = (d .* (sigma .^ -2)) .* (... k .* (b .^ (-d-1)) - ... (k ./ b) .^ 2); endif der(z <= 0) = 0; %no probability mass in this region ACOV(3, 3) = sum(der(:)); #k, mu if nargin > 5 && ~isempty(kk_terms) #use a series expansion to find the derivatives involving k more accurately when k is small der = 2 * a .* kk_terms .* (f - 1 - k) - a .^ 2 .* k_terms .* kk_terms .* f + k_terms; #k, a second derivative der = -der ./ sigma; else der = ( (b .^ (-d)) .* (c ./ k - a ./ b) ./ k - ... a .* (b .^ (-d-1)) + ... ((1 ./ k) - d) ./ b + a .* k .* d ./ (b .^ 2)) ./ sigma; endif der(z <= 0) = 0; %no probability mass in this region ACOV(1, 3) = ACOV(3, 1) = sum(der(:)); #k, sigma der = a .* der; der(z <= 0) = 0; %no probability mass in this region ACOV(1, 2) = ACOV(2, 1) = sum(der(:)); #sigma, mu if nargin > 5 && ~isempty(kk_terms) #use a series expansion to find the derivatives involving k more accurately when k is small der = ((-k_terms + 3 * a .* k .* kk_terms - (a .* k) .^ 2 .* kkk_terms) .* (f - k - 1) + a .* (k_terms - a .* k .* kk_terms) .^ 2 .* f) ./ (sigma .^ 2); else der = ( -(b .^ (-d)) + ... a .* k .* d .* (b .^ (-d-1)) + ... (d .* k ./ b) - a .* (k./b).^2 .* d) ./ (sigma .^ 2); end der(z <= 0) = 0; %no probability mass in this region ACOV(2, 3) = ACOV(3, 2) = sum(der(:)); endfunction %!test %! x = 1; %! k = 0.2; %! sigma = 0.3; %! mu = 0.5; %! [L, D, C] = gevlike ([k sigma mu], x); %! expected_L = 0.75942; %! expected_D = [0.53150; -0.67790; -2.40674]; %! expected_C = [-0.12547 1.77884 1.06731; 1.77884 16.40761 8.48877; 1.06731 8.48877 0.27979]; %! assert (L, expected_L, 0.001); %! assert (D, expected_D, 0.001); %! assert (C, expected_C, 0.001); %!test %! x = 1; %! k = 0; %! sigma = 0.3; %! mu = 0.5; %! [L, D, C] = gevlike ([k sigma mu], x); %! expected_L = 0.65157; %! expected_D = [0.54011; -1.17291; -2.70375]; %! expected_C = [0.090036 3.41229 2.047337; 3.412229 24.760027 12.510190; 2.047337 12.510190 2.098618]; %! assert (L, expected_L, 0.001); %! assert (D, expected_D, 0.001); %! assert (C, expected_C, 0.001); %!test %! x = -5:-1; %! k = -0.2; %! sigma = 0.3; %! mu = 0.5; %! [L, D, C] = gevlike ([k sigma mu], x); %! expected_L = 3786.4; %! expected_D = [6.4511e+04; -4.8194e+04; 3.0633e+03]; %! expected_C = -[-1.4937e+06 1.0083e+06 -6.1837e+04; 1.0083e+06 -8.1138e+05 4.0917e+04; -6.1837e+04 4.0917e+04 -2.0422e+03]; %! assert (L, expected_L, -0.001); %! assert (D, expected_D, -0.001); %! assert (C, expected_C, -0.001); statistics-1.3.0/inst/gevpdf.m0000755000000000000000000000731412776476211014466 0ustar 00000000000000## Copyright (C) 2012 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{y} =} gevpdf (@var{x}, @var{k}, @var{sigma}, @var{mu}) ## Compute the probability density function of the generalized extreme value (GEV) distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{x} is the support. ## ## @item ## @var{k} is the shape parameter of the GEV distribution. (Also denoted gamma or xi.) ## @item ## @var{sigma} is the scale parameter of the GEV distribution. The elements ## of @var{sigma} must be positive. ## @item ## @var{mu} is the location parameter of the GEV distribution. ## @end itemize ## The inputs must be of common size, or some of them must be scalar. ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{y} is the probability density of the GEV distribution at each ## element of @var{x} and corresponding parameter values. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = 0:0.5:2.5; ## sigma = 1:6; ## k = 1; ## mu = 0; ## y = gevpdf (x, k, sigma, mu) ## @end group ## ## @group ## y = gevpdf (x, k, 0.5, mu) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Rolf-Dieter Reiss and Michael Thomas. @cite{Statistical Analysis of Extreme Values with Applications to Insurance, Finance, Hydrology and Other Fields}. Chapter 1, pages 16-17, Springer, 2007. ## ## @end enumerate ## @seealso{gevcdf, gevfit, gevinv, gevlike, gevrnd, gevstat} ## @end deftypefn ## Author: Nir Krakauer ## Description: PDF of the generalized extreme value distribution function y = gevpdf (x, k, sigma, mu) # Check arguments if (nargin != 4) print_usage (); endif if (isempty (x) || isempty (k) || isempty (sigma) || isempty (mu) || ~ismatrix (x) || ~ismatrix (k) || ~ismatrix (sigma) || ~ismatrix (mu)) error ("gevpdf: inputs must be numeric matrices"); endif [retval, x, k, sigma, mu] = common_size (x, k, sigma, mu); if (retval > 0) error ("gevpdf: inputs must be of common size or scalars"); endif z = 1 + k .* (x - mu) ./ sigma; # Calculate pdf y = exp(-(z .^ (-1 ./ k))) .* (z .^ (-1 - 1 ./ k)) ./ sigma; y(z <= 0) = 0; inds = (abs (k) < (eps^0.7)); %use a different formula if k is very close to zero if any(inds) z = (mu(inds) - x(inds)) ./ sigma(inds); y(inds) = exp(z-exp(z)) ./ sigma(inds); endif endfunction %!test %! x = 0:0.5:2.5; %! sigma = 1:6; %! k = 1; %! mu = 0; %! y = gevpdf (x, k, sigma, mu); %! expected_y = [0.367879 0.143785 0.088569 0.063898 0.049953 0.040997]; %! assert (y, expected_y, 0.001); %!test %! x = -0.5:0.5:2.5; %! sigma = 0.5; %! k = 1; %! mu = 0; %! y = gevpdf (x, k, sigma, mu); %! expected_y = [0 0.735759 0.303265 0.159229 0.097350 0.065498 0.047027]; %! assert (y, expected_y, 0.001); %!test #check for continuity for k near 0 %! x = 1; %! sigma = 0.5; %! k = -0.03:0.01:0.03; %! mu = 0; %! y = gevpdf (x, k, sigma, mu); %! expected_y = [0.23820 0.23764 0.23704 0.23641 0.23576 0.23508 0.23438]; %! assert (y, expected_y, 0.001); statistics-1.3.0/inst/gevrnd.m0000755000000000000000000001021512776476211014472 0ustar 00000000000000## Copyright (C) 2012 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {Function File} {} gevrnd (@var{k}, @var{sigma}, @var{mu}) ## @deftypefnx {Function File} {} gevrnd (@var{k}, @var{sigma}, @var{mu}, @var{r}) ## @deftypefnx {Function File} {} gevrnd (@var{k}, @var{sigma}, @var{mu}, @var{r}, @var{c}, @dots{}) ## @deftypefnx {Function File} {} gevrnd (@var{k}, @var{sigma}, @var{mu}, [@var{sz}]) ## Return a matrix of random samples from the generalized extreme value (GEV) distribution with parameters ## @var{k}, @var{sigma}, @var{mu}. ## ## When called with a single size argument, returns a square matrix with ## the dimension specified. When called with more than one scalar argument the ## first two arguments are taken as the number of rows and columns and any ## further arguments specify additional matrix dimensions. The size may also ## be specified with a vector @var{sz} of dimensions. ## ## If no size arguments are given, then the result matrix is the common size of ## the input parameters. ## @seealso{gevcdf, gevfit, gevinv, gevlike, gevpdf, gevstat} ## @end deftypefn ## Author: Nir Krakauer ## Description: Random deviates from the generalized extreme value distribution function rnd = gevrnd (k, sigma, mu, varargin) if (nargin < 3) print_usage (); endif if any (sigma <= 0) error ("gevrnd: sigma must be positive"); endif if (!isscalar (k) || !isscalar (sigma) || !isscalar (mu)) [retval, k, sigma, mu] = common_size (k, sigma, mu); if (retval > 0) error ("gevrnd: k, sigma, mu must be of common size or scalars"); endif endif if (iscomplex (k) || iscomplex (sigma) || iscomplex (mu)) error ("gevrnd: k, sigma, mu must not be complex"); endif if (nargin == 3) sz = size (k); elseif (nargin == 4) if (isscalar (varargin{1}) && varargin{1} >= 0) sz = [varargin{1}, varargin{1}]; elseif (isrow (varargin{1}) && all (varargin{1} >= 0)) sz = varargin{1}; else error ("gevrnd: dimension vector must be row vector of non-negative integers"); endif elseif (nargin > 4) if (any (cellfun (@(x) (!isscalar (x) || x < 0), varargin))) error ("gevrnd: dimensions must be non-negative integers"); endif sz = [varargin{:}]; endif if (!isscalar (k) && !isequal (size (k), sz)) error ("gevrnd: k, sigma, mu must be scalar or of size SZ"); endif if (isa (k, "single") || isa (sigma, "single") || isa (mu, "single")) cls = "single"; else cls = "double"; endif rnd = gevinv (rand(sz), k, sigma, mu); if (strcmp (cls, "single")) rnd = single (rnd); endif endfunction %!assert(size (gevrnd (1,2,1)), [1, 1]); %!assert(size (gevrnd (ones(2,1), 2, 1)), [2, 1]); %!assert(size (gevrnd (ones(2,2), 2, 1)), [2, 2]); %!assert(size (gevrnd (1, 2*ones(2,1), 1)), [2, 1]); %!assert(size (gevrnd (1, 2*ones(2,2), 1)), [2, 2]); %!assert(size (gevrnd (1, 2, 1, 3)), [3, 3]); %!assert(size (gevrnd (1, 2, 1, [4 1])), [4, 1]); %!assert(size (gevrnd (1, 2, 1, 4, 1)), [4, 1]); %% Test input validation %!error gevrnd () %!error gevrnd (1, 2) %!error gevrnd (ones(3),ones(2),1) %!error gevrnd (ones(2),ones(3),1) %!error gevrnd (i, 2, 1) %!error gevrnd (2, i, 1) %!error gevrnd (2, 0, 1) %!error gevrnd (1,2, 1, -1) %!error gevrnd (1,2, 1, ones(2)) %!error gevrnd (1,2, 1, [2 -1 2]) %!error gevrnd (1,2, 1, 1, ones(2)) %!error gevrnd (1,2, 1, 1, -1) %!error gevrnd (ones(2,2), 2, 1, 3) %!error gevrnd (ones(2,2), 2, 1, [3, 2]) %!error gevrnd (ones(2,2), 2, 1, 2, 3) statistics-1.3.0/inst/gevstat.m0000755000000000000000000000570312776476211014670 0ustar 00000000000000## Copyright (C) 2012 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} gevstat (@var{k}, @var{sigma}, @var{mu}) ## Compute the mean and variance of the generalized extreme value (GEV) distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{k} is the shape parameter of the GEV distribution. (Also denoted gamma or xi.) ## @item ## @var{sigma} is the scale parameter of the GEV distribution. The elements ## of @var{sigma} must be positive. ## @item ## @var{mu} is the location parameter of the GEV distribution. ## @end itemize ## The inputs must be of common size, or some of them must be scalar. ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the GEV distribution ## ## @item ## @var{v} is the variance of the GEV distribution ## @end itemize ## @seealso{gevcdf, gevfit, gevinv, gevlike, gevpdf, gevrnd} ## @end deftypefn ## Author: Nir Krakauer ## Description: Moments of the generalized extreme value distribution function [m, v] = gevstat (k, sigma, mu) # Check arguments if (nargin < 3) print_usage (); endif if (isempty (k) || isempty (sigma) || isempty (mu) || ~ismatrix (k) || ~ismatrix (sigma) || ~ismatrix (mu)) error ("gevstat: inputs must be numeric matrices"); endif [retval, k, sigma, mu] = common_size (k, sigma, mu); if (retval > 0) error ("gevstat: inputs must be of common size or scalars"); endif eg = 0.57721566490153286; %Euler-Mascheroni constant m = v = k; #find the mean m(k >= 1) = Inf; m(k == 0) = mu(k == 0) + eg*sigma(k == 0); m(k < 1 & k ~= 0) = mu(k < 1 & k ~= 0) + sigma(k < 1 & k ~= 0) .* (gamma(1-k(k < 1 & k ~= 0)) - 1) ./ k(k < 1 & k ~= 0); #find the variance v(k >= 0.5) = Inf; v(k == 0) = (pi^2 / 6) * sigma(k == 0) .^ 2; v(k < 0.5 & k ~= 0) = (gamma(1-2*k(k < 0.5 & k ~= 0)) - gamma(1-k(k < 0.5 & k ~= 0)).^2) .* (sigma(k < 0.5 & k ~= 0) ./ k(k < 0.5 & k ~= 0)) .^ 2; endfunction %!test %! k = [-1 -0.5 0 0.2 0.4 0.5 1]; %! sigma = 2; %! mu = 1; %! [m, v] = gevstat (k, sigma, mu); %! expected_m = [1 1.4551 2.1544 2.6423 3.4460 4.0898 Inf]; %! expected_v = [4 3.4336 6.5797 13.3761 59.3288 Inf Inf]; %! assert (m, expected_m, -0.001); %! assert (v, expected_v, -0.001); statistics-1.3.0/inst/gpcdf.m0000644000000000000000000001504212776476211014270 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1997-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} gpcdf (@var{x}, @var{location}, @var{scale}, @var{shape}) ## Compute the cumulative distribution function (CDF) at @var{x} of the ## generalized Pareto distribution with parameters @var{location}, @var{scale}, ## and @var{shape}. ## @end deftypefn ## Author: Dag Lyberg ## Description: PDF of the generalized Pareto distribution function cdf = gpcdf (x, location, scale, shape) if (nargin != 4) print_usage (); endif if (! isscalar (location) || ! isscalar (scale) || ! isscalar (shape)) [retval, x, location, scale, shape] = ... common_size (x, location, scale, shape); if (retval > 0) error ("gpcdf: X, LOCATION, SCALE and SHAPE must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (location) || iscomplex (scale) ... || iscomplex (shape)) error ("gpcdf: X, LOCATION, SCALE and SHAPE must not be complex"); endif if (isa (x, "single") || isa (location, "single") || isa (scale, "single") ... || isa (shape, "single")) cdf = zeros (size (x), "single"); else cdf = zeros (size (x)); endif k = isnan (x) | ! (-Inf < location) | ! (location < Inf) | ! (scale > 0) ... | ! (-Inf < shape) | ! (shape < Inf); cdf(k) = NaN; k = (x == Inf) & (-Inf < location) & (location < Inf) & (scale > 0) ... & (-Inf < shape) & (shape < Inf); cdf(k) = 1; k = (-Inf < x) & (x < Inf) & (-Inf < location) & (location < Inf) ... & (scale > 0) & (scale < Inf) & (-Inf < shape) & (shape < Inf); if (isscalar (location) && isscalar (scale) && isscalar (shape)) z = (x - location) / scale; j = k & (shape == 0) & (z >= 0); if (any (j)) cdf(j) = 1 - exp (-z(j)); endif j = k & (shape > 0) & (z >= 0); if (any (j)) cdf(j) = 1 - (shape * z(j) + 1).^(-1 / shape); endif if (shape < 0) j = k & (shape < 0) & (0 <= z) & (z <= -1 ./ shape); if (any (j)) cdf(j) = 1 - (shape * z(j) + 1).^(-1 / shape); endif endif else z = (x - location) ./ scale; j = k & (shape == 0) & (z >= 0); if (any (j)) cdf(j) = 1 - exp (-z(j)); endif j = k & (shape > 0) & (z >= 0); if (any (j)) cdf(j) = 1 - (shape(j) .* z(j) + 1).^(-1 ./ shape(j)); endif if (any (shape < 0)) j = k & (shape < 0) & (0 <= z) & (z <= -1 ./ shape); if (any (j)) cdf(j) = 1 - (shape(j) .* z(j) + 1).^(-1 ./ shape(j)); endif endif endif endfunction %!shared x,y1,y2,y3 %! x = [-Inf, -1, 0, 1/2, 1, Inf]; %! y1 = [0, 0, 0, 0.3934693402873666, 0.6321205588285577, 1]; %! y2 = [0, 0, 0, 1/3, 1/2, 1]; %! y3 = [0, 0, 0, 1/2, 1, 1]; %! seps = eps('single')*5; %!assert (gpcdf (x, zeros (1,6), ones (1,6), zeros (1,6)), y1, eps) %!assert (gpcdf (x, zeros (1,6), 1, 0), y1, eps) %!assert (gpcdf (x, 0, ones (1,6), 0), y1, eps) %!assert (gpcdf (x, 0, 1, zeros (1,6)), y1, eps) %!assert (gpcdf (x, 0, 1, 0), y1, eps) %!assert (gpcdf (x, [0, 0, 0, NaN, 0, 0], 1, 0), [y1(1:3), NaN, y1(5:6)], eps) %!assert (gpcdf (x, 0, [1, 1, 1, NaN, 1, 1], 0), [y1(1:3), NaN, y1(5:6)], eps) %!assert (gpcdf (x, 0, 1, [0, 0, 0, NaN, 0, 0]), [y1(1:3), NaN, y1(5:6)], eps) %!assert (gpcdf ([x(1:3), NaN, x(5:6)], 0, 1, 0), [y1(1:3), NaN, y1(5:6)], eps) %!assert (gpcdf (x, zeros (1,6), ones (1,6), ones (1,6)), y2, eps) %!assert (gpcdf (x, zeros (1,6), 1, 1), y2, eps) %!assert (gpcdf (x, 0, ones (1,6), 1), y2, eps) %!assert (gpcdf (x, 0, 1, ones (1,6)), y2, eps) %!assert (gpcdf (x, 0, 1, 1), y2, eps) %!assert (gpcdf (x, [0, 0, 0, NaN, 0, 0], 1, 1), [y2(1:3), NaN, y2(5:6)], eps) %!assert (gpcdf (x, 0, [1, 1, 1, NaN, 1, 1], 1), [y2(1:3), NaN, y2(5:6)], eps) %!assert (gpcdf (x, 0, 1, [1, 1, 1, NaN, 1, 1]), [y2(1:3), NaN, y2(5:6)], eps) %!assert (gpcdf ([x(1:3), NaN, x(5:6)], 0, 1, 1), [y2(1:3), NaN, y2(5:6)], eps) %!assert (gpcdf (x, zeros (1,6), ones (1,6), -ones (1,6)), y3, eps) %!assert (gpcdf (x, zeros (1,6), 1, -1), y3, eps) %!assert (gpcdf (x, 0, ones (1,6), -1), y3, eps) %!assert (gpcdf (x, 0, 1, -ones (1,6)), y3, eps) %!assert (gpcdf (x, 0, 1, -1), y3, eps) %!assert (gpcdf (x, [0, 0, 0, NaN, 0, 0], 1, -1), [y1(1:3), NaN, y3(5:6)], eps) %!assert (gpcdf (x, 0, [1, 1, 1, NaN, 1, 1], -1), [y1(1:3), NaN, y3(5:6)], eps) %!assert (gpcdf (x, 0, 1, [-1, -1, -1, NaN, -1, -1]), [y1(1:3), NaN, y3(5:6)], eps) %!assert (gpcdf ([x(1:3), NaN, x(5:6)], 0, 1, -1), [y1(1:3), NaN, y3(5:6)], eps) ## Test class of input preserved %!assert (gpcdf (single ([x, NaN]), 0, 1, 0), single ([y1, NaN]), eps('single')) %!assert (gpcdf ([x, NaN], single (0), 1, 0), single ([y1, NaN]), eps('single')) %!assert (gpcdf ([x, NaN], 0, single (1), 0), single ([y1, NaN]), eps('single')) %!assert (gpcdf ([x, NaN], 0, 1, single (0)), single ([y1, NaN]), eps('single')) %!assert (gpcdf (single ([x, NaN]), 0, 1, 1), single ([y2, NaN]), eps('single')) %!assert (gpcdf ([x, NaN], single (0), 1, 1), single ([y2, NaN]), eps('single')) %!assert (gpcdf ([x, NaN], 0, single (1), 1), single ([y2, NaN]), eps('single')) %!assert (gpcdf ([x, NaN], 0, 1, single (1)), single ([y2, NaN]), eps('single')) %!assert (gpcdf (single ([x, NaN]), 0, 1, -1), single ([y3, NaN]), eps('single')) %!assert (gpcdf ([x, NaN], single (0), 1, -1), single ([y3, NaN]), eps('single')) %!assert (gpcdf ([x, NaN], 0, single (1), -1), single ([y3, NaN]), eps('single')) %!assert (gpcdf ([x, NaN], 0, 1, single (-1)), single ([y3, NaN]), eps('single')) ## Test input validation %!error gpcdf () %!error gpcdf (1) %!error gpcdf (1,2) %!error gpcdf (1,2,3) %!error gpcdf (1,2,3,4,5) %!error gpcdf (ones (3), ones (2), ones (2), ones (2)) %!error gpcdf (ones (2), ones (3), ones (2), ones (2)) %!error gpcdf (ones (2), ones (2), ones (3), ones (2)) %!error gpcdf (ones (2), ones (2), ones (2), ones (3)) %!error gpcdf (i, 2, 2, 2) %!error gpcdf (2, i, 2, 2) %!error gpcdf (2, 2, i, 2) %!error gpcdf (2, 2, 2, i) statistics-1.3.0/inst/gpinv.m0000644000000000000000000001406412776476211014333 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1997-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} gpinv (@var{x}, @var{location}, @var{scale}, @var{shape}) ## For each element of @var{x}, compute the quantile (the inverse of the CDF) ## at @var{x} of the generalized Pareto distribution with parameters ## @var{location}, @var{scale}, and @var{shape}. ## @end deftypefn ## Author: Dag Lyberg ## Description: Quantile function of the generalized Pareto distribution function inv = gpinv (x, location, scale, shape) if (nargin != 4) print_usage (); endif if (! isscalar (location) || ! isscalar (scale) || ! isscalar (shape)) [retval, x, location, scale, shape] = ... common_size (x, location, scale, shape); if (retval > 0) error ("gpinv: X, LOCATION, SCALE and SHAPE must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (location) ... || iscomplex (scale) || iscomplex (shape)) error ("gpinv: X, LOCATION, SCALE and SHAPE must not be complex"); endif if (isa (x, "single") || isa (location, "single") ... || isa (scale, "single") || isa (shape, "single")) inv = zeros (size (x), "single"); else inv = zeros (size (x)); endif k = isnan (x) | ! (0 <= x) | ! (x <= 1) ... | ! (-Inf < location) | ! (location < Inf) ... | ! (scale > 0) | ! (scale < Inf) ... | ! (-Inf < shape) | ! (shape < Inf); inv(k) = NaN; k = (0 <= x) & (x <= 1) & (-Inf < location) & (location < Inf) ... & (scale > 0) & (scale < Inf) & (-Inf < shape) & (shape < Inf); if (isscalar (location) && isscalar (scale) && isscalar (shape)) if (shape == 0) inv(k) = -log(1 - x(k)); inv(k) = scale * inv(k) + location; elseif (shape > 0) inv(k) = (1 - x(k)).^(-shape) - 1; inv(k) = (scale / shape) * inv(k) + location; elseif (shape < 0) inv(k) = (1 - x(k)).^(-shape) - 1; inv(k) = (scale / shape) * inv(k) + location; end else j = k & (shape == 0); if (any (j)) inv(j) = -log (1 - x(j)); inv(j) = scale(j) .* inv(j) + location(j); endif j = k & (shape > 0); if (any (j)) inv(j) = (1 - x(j)).^(-shape(j)) - 1; inv(j) = (scale(j) ./ shape(j)) .* inv(j) + location(j); endif j = k & (shape < 0); if (any (j)) inv(j) = (1 - x(j)).^(-shape(j)) - 1; inv(j) = (scale(j) ./ shape(j)) .* inv(j) + location(j); endif endif endfunction %!shared x,y1,y2,y3 %! x = [-1, 0, 1/2, 1, 2]; %! y1 = [NaN, 0, 0.6931471805599453, Inf, NaN]; %! y2 = [NaN, 0, 1, Inf, NaN]; %! y3 = [NaN, 0, 1/2, 1, NaN]; %!assert (gpinv (x, zeros (1,5), ones (1,5), zeros (1,5)), y1) %!assert (gpinv (x, zeros (1,5), 1, 0), y1) %!assert (gpinv (x, 0, ones (1,5), 0), y1) %!assert (gpinv (x, 0, 1, zeros (1,5)), y1) %!assert (gpinv (x, 0, 1, 0), y1) %!assert (gpinv (x, [0, 0, NaN, 0, 0], 1, 0), [y1(1:2), NaN, y1(4:5)]) %!assert (gpinv (x, 0, [1, 1, NaN, 1, 1], 0), [y1(1:2), NaN, y1(4:5)]) %!assert (gpinv (x, 0, 1, [0, 0, NaN, 0, 0]), [y1(1:2), NaN, y1(4:5)]) %!assert (gpinv ([x(1:2), NaN, x(4:5)], 0, 1, 0), [y1(1:2), NaN, y1(4:5)]) %!assert (gpinv (x, zeros (1,5), ones (1,5), ones (1,5)), y2) %!assert (gpinv (x, zeros (1,5), 1, 1), y2) %!assert (gpinv (x, 0, ones (1,5), 1), y2) %!assert (gpinv (x, 0, 1, ones (1,5)), y2) %!assert (gpinv (x, 0, 1, 1), y2) %!assert (gpinv (x, [0, 0, NaN, 0, 0], 1, 1), [y2(1:2), NaN, y2(4:5)]) %!assert (gpinv (x, 0, [1, 1, NaN, 1, 1], 1), [y2(1:2), NaN, y2(4:5)]) %!assert (gpinv (x, 0, 1, [1, 1, NaN, 1, 1]), [y2(1:2), NaN, y2(4:5)]) %!assert (gpinv ([x(1:2), NaN, x(4:5)], 0, 1, 1), [y2(1:2), NaN, y2(4:5)]) %!assert (gpinv (x, zeros (1,5), ones (1,5), -ones (1,5)), y3) %!assert (gpinv (x, zeros (1,5), 1, -1), y3) %!assert (gpinv (x, 0, ones (1,5), -1), y3) %!assert (gpinv (x, 0, 1, -ones (1,5)), y3) %!assert (gpinv (x, 0, 1, -1), y3) %!assert (gpinv (x, [0, 0, NaN, 0, 0], 1, -1), [y3(1:2), NaN, y3(4:5)]) %!assert (gpinv (x, 0, [1, 1, NaN, 1, 1], -1), [y3(1:2), NaN, y3(4:5)]) %!assert (gpinv (x, 0, 1, -[1, 1, NaN, 1, 1]), [y3(1:2), NaN, y3(4:5)]) %!assert (gpinv ([x(1:2), NaN, x(4:5)], 0, 1, -1), [y3(1:2), NaN, y3(4:5)]) ## Test class of input preserved %!assert (gpinv (single ([x, NaN]), 0, 1, 0), single ([y1, NaN])) %!assert (gpinv ([x, NaN], single (0), 1, 0), single ([y1, NaN])) %!assert (gpinv ([x, NaN], 0, single (1), 0), single ([y1, NaN])) %!assert (gpinv ([x, NaN], 0, 1, single (0)), single ([y1, NaN])) %!assert (gpinv (single ([x, NaN]), 0, 1, 1), single ([y2, NaN])) %!assert (gpinv ([x, NaN], single (0), 1, 1), single ([y2, NaN])) %!assert (gpinv ([x, NaN], 0, single (1), 1), single ([y2, NaN])) %!assert (gpinv ([x, NaN], 0, 1, single (1)), single ([y2, NaN])) %!assert (gpinv (single ([x, NaN]), 0, 1, -1), single ([y3, NaN])) %!assert (gpinv ([x, NaN], single (0), 1, -1), single ([y3, NaN])) %!assert (gpinv ([x, NaN], 0, single (1), -1), single ([y3, NaN])) %!assert (gpinv ([x, NaN], 0, 1, single (-1)), single ([y3, NaN])) ## Test input validation %!error gpinv () %!error gpinv (1) %!error gpinv (1,2) %!error gpinv (1,2,3) %!error gpinv (1,2,3,4,5) %!error gpinv (ones (3), ones (2), ones (2), ones (2)) %!error gpinv (ones (2), ones (3), ones (2), ones (2)) %!error gpinv (ones (2), ones (2), ones (3), ones (2)) %!error gpinv (ones (2), ones (2), ones (2), ones (3)) %!error gpinv (i, 2, 2, 2) %!error gpinv (2, i, 2, 2) %!error gpinv (2, 2, i, 2) %!error gpinv (2, 2, 2, i) statistics-1.3.0/inst/gppdf.m0000644000000000000000000001415712776476211014313 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1997-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} gppdf (@var{x}, @var{location}, @var{scale}, @var{shape}) ## Compute the probability density function (PDF) at @var{x} of the ## generalized Pareto distribution with parameters @var{location}, @var{scale}, ## and @var{shape}. ## @end deftypefn ## Author: Dag Lyberg ## Description: PDF of the generalized Pareto distribution function pdf = gppdf (x, location, scale, shape) if (nargin != 4) print_usage (); endif if (! isscalar (location) || ! isscalar (scale) || ! isscalar (shape)) [retval, x, location, scale, shape] = ... common_size (x, location, scale, shape); if (retval > 0) error ("gppdf: X, LOCATION, SCALE and SHAPE must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (location) || iscomplex (scale) ... || iscomplex (shape)) error ("gppdf: X, LOCATION, SCALE and SHAPE must not be complex"); endif if (isa (x, "single") || isa (location, "single") || isa (scale, "single") ... || isa (shape, "single")) pdf = zeros (size (x), "single"); else pdf = zeros (size (x)); endif k = isnan (x) | ! (-Inf < location) | ! (location < Inf) | ... ! (scale > 0) | ! (scale < Inf) | ! (-Inf < shape) | ! (shape < Inf); pdf(k) = NaN; k = (-Inf < x) & (x < Inf) & (-Inf < location) & (location < Inf) & ... (scale > 0) & (scale < Inf) & (-Inf < shape) & (shape < Inf); if (isscalar (location) && isscalar (scale) && isscalar (shape)) z = (x - location) / scale; j = k & (shape == 0) & (z >= 0); if (any (j)) pdf(j) = exp (-z(j)); endif j = k & (shape > 0) & (z >= 0); if (any (j)) pdf(j) = (shape * z(j) + 1).^(-(shape + 1) / shape); endif if (shape < 0) j = k & (shape < 0) & (0 <= z) & (z <= -1. / shape); if (any (j)) pdf(j) = (shape * z(j) + 1).^(-(shape + 1) / shape); endif endif else z = (x - location) ./ scale; j = k & (shape == 0) & (z >= 0); if (any (j)) pdf(j) = exp( -z(j)); endif j = k & (shape > 0) & (z >= 0); if (any (j)) pdf(j) = (shape(j) .* z(j) + 1).^(-(shape(j) + 1) ./ shape(j)); endif if (any (shape < 0)) j = k & (shape < 0) & (0 <= z) & (z <= -1 ./ shape); if (any (j)) pdf(j) = (shape(j) .* z(j) + 1).^(-(shape(j) + 1) ./ shape(j)); endif endif endif endfunction %!shared x,y1,y2,y3 %! x = [-Inf, -1, 0, 1/2, 1, Inf]; %! y1 = [0, 0, 1, 0.6065306597126334, 0.36787944117144233, 0]; %! y2 = [0, 0, 1, 4/9, 1/4, 0]; %! y3 = [0, 0, 1, 1, 1, 0]; %!assert (gppdf (x, zeros (1,6), ones (1,6), zeros (1,6)), y1, eps) %!assert (gppdf (x, zeros (1,6), 1, 0), y1, eps) %!assert (gppdf (x, 0, ones (1,6), 0), y1, eps) %!assert (gppdf (x, 0, 1, zeros (1,6)), y1, eps) %!assert (gppdf (x, 0, 1, 0), y1, eps) %!assert (gppdf (x, [0, 0, 0, NaN, 0, 0], 1, 0), [y1(1:3), NaN, y1(5:6)]) %!assert (gppdf (x, 0, [1, 1, 1, NaN, 1, 1], 0), [y1(1:3), NaN, y1(5:6)]) %!assert (gppdf (x, 0, 1, [0, 0, 0, NaN, 0, 0]), [y1(1:3), NaN, y1(5:6)]) %!assert (gppdf ([x(1:3), NaN, x(5:6)], 0, 1, 0), [y1(1:3), NaN, y1(5:6)]) %!assert (gppdf (x, zeros (1,6), ones (1,6), ones (1,6)), y2, eps) %!assert (gppdf (x, zeros (1,6), 1, 1), y2, eps) %!assert (gppdf (x, 0, ones (1,6), 1), y2, eps) %!assert (gppdf (x, 0, 1, ones (1,6)), y2, eps) %!assert (gppdf (x, 0, 1, 1), y2, eps) %!assert (gppdf (x, [0, 0, 0, NaN, 0, 0], 1, 1), [y2(1:3), NaN, y2(5:6)]) %!assert (gppdf (x, 0, [1, 1, 1, NaN, 1, 1], 1), [y2(1:3), NaN, y2(5:6)]) %!assert (gppdf (x, 0, 1, [1, 1, 1, NaN, 1, 1]), [y2(1:3), NaN, y2(5:6)]) %!assert (gppdf ([x(1:3), NaN, x(5:6)], 0, 1, 1), [y2(1:3), NaN, y2(5:6)]) %!assert (gppdf (x, zeros (1,6), ones (1,6), -ones (1,6)), y3, eps) %!assert (gppdf (x, zeros (1,6), 1, -1), y3, eps) %!assert (gppdf (x, 0, ones (1,6), -1), y3, eps) %!assert (gppdf (x, 0, 1, -ones (1,6)), y3, eps) %!assert (gppdf (x, 0, 1, -1), y3, eps) %!assert (gppdf (x, [0, 0, 0, NaN, 0, 0], 1, -1), [y3(1:3), NaN, y3(5:6)]) %!assert (gppdf (x, 0, [1, 1, 1, NaN, 1, 1], -1), [y3(1:3), NaN, y3(5:6)]) %!assert (gppdf (x, 0, 1, [-1, -1, -1, NaN, -1, -1]), [y3(1:3), NaN, y3(5:6)]) %!assert (gppdf ([x(1:3), NaN, x(5:6)], 0, 1, -1), [y3(1:3), NaN, y3(5:6)]) ## Test class of input preserved %!assert (gppdf (single ([x, NaN]), 0, 1, 0), single ([y1, NaN])) %!assert (gppdf ([x, NaN], single (0), 1, 0), single ([y1, NaN])) %!assert (gppdf ([x, NaN], 0, single (1), 0), single ([y1, NaN])) %!assert (gppdf ([x, NaN], 0, 1, single (0)), single ([y1, NaN])) %!assert (gppdf (single ([x, NaN]), 0, 1, 1), single ([y2, NaN])) %!assert (gppdf ([x, NaN], single (0), 1, 1), single ([y2, NaN])) %!assert (gppdf ([x, NaN], 0, single (1), 1), single ([y2, NaN])) %!assert (gppdf ([x, NaN], 0, 1, single (1)), single ([y2, NaN])) %!assert (gppdf (single ([x, NaN]), 0, 1, -1), single ([y3, NaN])) %!assert (gppdf ([x, NaN], single (0), 1, -1), single ([y3, NaN])) %!assert (gppdf ([x, NaN], 0, single (1), -1), single ([y3, NaN])) %!assert (gppdf ([x, NaN], 0, 1, single (-1)), single ([y3, NaN])) ## Test input validation %!error gppdf () %!error gppdf (1) %!error gppdf (1,2) %!error gppdf (1,2,3) %!error gppdf (1,2,3,4,5) %!error gppdf (1, ones (3), ones (2), ones (2)) %!error gppdf (1, ones (2), ones (3), ones (2)) %!error gppdf (1, ones (2), ones (2), ones (3)) %!error gppdf (i, 2, 2, 2) %!error gppdf (2, i, 2, 2) %!error gppdf (2, 2, i, 2) %!error gppdf (2, 2, 2, i) statistics-1.3.0/inst/gprnd.m0000644000000000000000000001464612776476211014330 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} gprnd (@var{location}, @var{scale}, @var{shape}) ## @deftypefnx {} {} gprnd (@var{location}, @var{scale}, @var{shape}, @var{r}) ## @deftypefnx {} {} gprnd (@var{location}, @var{scale}, @var{shape}, @var{r}, @var{c}, @dots{}) ## @deftypefnx {} {} gprnd (@var{location}, @var{scale}, @var{shape}, [@var{sz}]) ## Return a matrix of random samples from the generalized Pareto distribution ## with parameters @var{location}, @var{scale} and @var{shape}. ## ## When called with a single size argument, return a square matrix with ## the dimension specified. When called with more than one scalar argument the ## first two arguments are taken as the number of rows and columns and any ## further arguments specify additional matrix dimensions. The size may also ## be specified with a vector of dimensions @var{sz}. ## ## If no size arguments are given then the result matrix is the common size of ## @var{location}, @var{scale} and @var{shape}. ## @end deftypefn ## Author: Dag Lyberg ## Description: Random deviates from the generalized Pareto distribution function rnd = gprnd (location, scale, shape, varargin) if (nargin < 3) print_usage (); endif if (! isscalar (location) || ! isscalar (scale) || ! isscalar (shape)) [retval, location, scale, shape] = common_size (location, scale, shape); if (retval > 0) error ("gpgrnd: LOCATION, SCALE and SHAPE must be of common size or scalars"); endif endif if (iscomplex (location) || iscomplex (scale) || iscomplex (shape)) error ("gprnd: LOCATION, SCALE and SHAPE must not be complex"); endif if (nargin == 3) sz = size (location); elseif (nargin == 4) if (isscalar (varargin{1}) && varargin{1} >= 0) sz = [varargin{1}, varargin{1}]; elseif (isrow (varargin{1}) && all (varargin{1} >= 0)) sz = varargin{1}; else error ("gprnd: dimension vector must be row vector of non-negative integers"); endif elseif (nargin > 4) if (any (cellfun (@(x) (! isscalar (x) || x < 0), varargin))) error ("gprnd: dimensions must be non-negative integers"); endif sz = [varargin{:}]; endif if (! isscalar (location) && ! isequal (size (location), sz)) error ("gprnd: LOCATION, SCALE and SHAPE must be scalar or of size SZ"); endif if (isa (location, "single") || isa (scale, "single") || isa (shape, "single")) cls = "single"; else cls = "double"; endif if (isscalar (location) && isscalar (scale) && isscalar (shape)) if ((-Inf < location) && (location < Inf) && (0 < scale) && (scale < Inf) ... && (-Inf < shape) && (shape < Inf)) rnd = rand(sz,cls); if (shape == 0) rnd = -log(1 - rnd); rnd = scale * rnd + location; elseif ((shape < 0) || (shape > 0)) rnd = (1 - rnd).^(-shape) - 1; rnd = (scale / shape) * rnd + location; end else rnd = NaN (sz, cls); endif else rnd = NaN (sz, cls); k = (-Inf < location) & (location < Inf) & (scale > 0) ... & (-Inf < shape) & (shape < Inf); rnd(k(:)) = rand (1, sum(k(:)), cls); if (any (shape == 0)) rnd(k) = -log(1 - rnd(k)); rnd(k) = scale(k) .* rnd(k) + location(k); elseif (any (shape < 0 | shape > 0)) rnd(k) = (1 - rnd(k)).^(-shape(k)) - 1; rnd(k) = (scale(k) ./ shape(k)) .* rnd(k) + location(k); end endif endfunction %!assert (size (gprnd (0,1,0)), [1, 1]) %!assert (size (gprnd (zeros (2,1), 1,0)), [2, 1]) %!assert (size (gprnd (zeros (2,2), 1,0)), [2, 2]) %!assert (size (gprnd (0, ones (2,1), 0)), [2, 1]) %!assert (size (gprnd (0, ones (2,2), 0)), [2, 2]) %!assert (size (gprnd (0,1, zeros (2,1))), [2, 1]) %!assert (size (gprnd (0,1, zeros (2,2))), [2, 2]) %!assert (size (gprnd (0,1, 0, 3)), [3, 3]) %!assert (size (gprnd (0,1, 0, [4 1])), [4, 1]) %!assert (size (gprnd (0,1, 0, 4, 1)), [4, 1]) %!assert (size (gprnd (0,1,1)), [1, 1]) %!assert (size (gprnd (zeros (2,1), 1,1)), [2, 1]) %!assert (size (gprnd (zeros (2,2), 1,1)), [2, 2]) %!assert (size (gprnd (0, ones (2,1), 1)), [2, 1]) %!assert (size (gprnd (0, ones (2,2), 1)), [2, 2]) %!assert (size (gprnd (0,1, ones (2,1))), [2, 1]) %!assert (size (gprnd (0,1, ones (2,2))), [2, 2]) %!assert (size (gprnd (0,1, 1, 3)), [3, 3]) %!assert (size (gprnd (0,1, 1, [4 1])), [4, 1]) %!assert (size (gprnd (0,1, 1, 4, 1)), [4, 1]) %!assert (size (gprnd (0,1,-1)), [1, 1]) %!assert (size (gprnd (zeros (2,1), -1,1)), [2, 1]) %!assert (size (gprnd (zeros (2,2), -1,1)), [2, 2]) %!assert (size (gprnd (0, ones (2,1), -1)), [2, 1]) %!assert (size (gprnd (0, ones (2,2), -1)), [2, 2]) %!assert (size (gprnd (0,1, -ones (2,1))), [2, 1]) %!assert (size (gprnd (0,1, -ones (2,2))), [2, 2]) %!assert (size (gprnd (0,1, -1, 3)), [3, 3]) %!assert (size (gprnd (0,1, -1, [4 1])), [4, 1]) %!assert (size (gprnd (0,1, -1, 4, 1)), [4, 1]) ## Test class of input preserved %!assert (class (gprnd (0,1,0)), "double") %!assert (class (gprnd (single (0),1,0)), "single") %!assert (class (gprnd (single ([0 0]),1,0)), "single") %!assert (class (gprnd (0,single (1),0)), "single") %!assert (class (gprnd (0,single ([1 1]),0)), "single") %!assert (class (gprnd (0,1,single (0))), "single") %!assert (class (gprnd (0,1,single ([0 0]))), "single") ## Test input validation %!error gprnd () %!error gprnd (1) %!error gprnd (1,2) %!error gprnd (zeros (3), ones (2), zeros (2)) %!error gprnd (zeros (2), ones (3), zeros (2)) %!error gprnd (zeros (2), ones (2), zeros (3)) %!error gprnd (i, 1, 0) %!error gprnd (0, i, 0) %!error gprnd (0, 1, i) %!error gprnd (0,1,0, -1) %!error gprnd (0,1,0, ones (2)) %!error gprnd (0,1,0, [2 -1 2]) %!error gprnd (zeros (2),1,0, 3) %!error gprnd (zeros (2),1,0, [3, 2]) %!error gprnd (zeros (2),1,0, 3, 2) statistics-1.3.0/inst/grp2idx.m0000755000000000000000000001130012776476211014560 0ustar 00000000000000## Copyright (C) 2015 Carnë Draug ## ## This program is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 3 of the ## License, or (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, see ## . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{g}, @var{gn}, @var{gl}] =} grp2idx (@var{s}) ## Get index for group variables. ## ## For variable @var{s}, returns the indices @var{g}, into the variable ## groups @var{gn} and @var{gl}. The first has a string representation of ## the groups while the later has its actual values. ## ## NaNs and empty strings in @var{s} appear as NaN in @var{g} and are ## not present on either @var{gn} and @var{gl}. ## ## @seealso{cellstr, num2str, unique} ## @end deftypefn function [g, gn, gl] = grp2idx (s) if (nargin != 1) print_usage (); endif s_was_char = false; if (ischar (s)) s_was_char = true; s = cellstr (s); elseif (! isvector (s)) error ("grp2idx: S must be a vector, cell array of strings, or char matrix"); endif ## FIXME once Octave core implements "sorted" and "stable" argument to ## unique(), we can use the following snippet so that we are fully ## Matlab compatible. # set_order = "sorted"; # if (iscellstr (s)) # set_order = "stable"; # endif # [gl, ~, g] = unique (s(:), set_order); [gl, ~, g] = unique (s(:)); ## handle NaNs and empty strings if (iscellstr (s)) ## FIXME empty strings appear at the front because unique is sorting ## them, so we only need to subtract one. However, when fix the ## order for strings (when core's unique has the stable option), ## then we'll have to come up with something clever. empties = cellfun (@isempty, s); if (any (empties)) g(empties) = NaN; g--; gl(1) = []; endif else ## This works fine because NaN come at the end after sorting, we don't ## have to worry about change on the indices. g(isnan (s)) = NaN; gl(isnan (gl)) = []; endif if (isargout (2)) if (iscellstr (gl)) gn = gl; elseif (iscell (gl)) gn = cellfun (@num2str, gl, "UniformOutput", false); else gn = arrayfun (@num2str, gl, "UniformOutput", false); endif endif if (isargout (3) && s_was_char) gl = char (gl); endif endfunction ## test boolean input and note that row or column vector makes no difference %!test %! in = [true false false true]; %! out = {[2; 1; 1; 2] {"0"; "1"} [false; true]}; %! assert (nthargout (1:3, @grp2idx, in), out) %! assert (nthargout (1:3, @grp2idx, in), nthargout (1:3, @grp2idx, in')) ## test that groups are ordered in boolean %!test %! assert (nthargout (1:3, @grp2idx, [false true]), %! {[1; 2] {"0"; "1"} [false; true]}); %! assert (nthargout (1:3, @grp2idx, [true false]), %! {[2; 1] {"0"; "1"} [false; true]}); ## test char matrix and cell array of strings %!assert (nthargout (1:3, @grp2idx, ["oct"; "sci"; "oct"; "oct"; "sci"]), %! {[1; 2; 1; 1; 2] {"oct"; "sci"} ["oct"; "sci"]}); ## and cell array of strings %!assert (nthargout (1:3, @grp2idx, {"oct"; "sci"; "oct"; "oct"; "sci"}), %! {[1; 2; 1; 1; 2] {"oct"; "sci"} {"oct"; "sci"}}); ## test numeric arrays %!assert (nthargout (1:3, @grp2idx, [ 1 -3 -2 -3 -3 2 1 -1 3 -3]), %! {[4; 1; 2; 1; 1; 5; 4; 3; 6; 1] {"-3"; "-2"; "-1"; "1"; "2"; "3"} ... %! [-3; -2; -1; 1; 2; 3]}); ## test for NaN and empty strings %!assert (nthargout (1:3, @grp2idx, [2 2 3 NaN 2 3]), %! {[1; 1; 2; NaN; 1; 2] {"2"; "3"} [2; 3]}) %!assert (nthargout (1:3, @grp2idx, {"et" "sa" "sa" "" "et"}), %! {[1; 2; 2; NaN; 1] {"et"; "sa"} {"et"; "sa"}}) ## FIXME this fails because unique() in core does not yet have set_order ## option implemented. See code for code to uncomment once it is ## implemented in core. ## Test that order when handling strings is by order of appearance %!assert (nthargout (1:3, @grp2idx, ["sci"; "oct"; "sci"; "oct"; "oct"]), %! {[1; 2; 1; 2; 2] {"sci"; "oct"} ["sci"; "oct"]}); %!assert (nthargout (1:3, @grp2idx, {"sci"; "oct"; "sci"; "oct"; "oct"}), %! {[1; 2; 1; 2; 2] {"sci"; "oct"} {"sci"; "oct"}}); %!assert (nthargout (1:3, @grp2idx, {"sa" "et" "et" "" "sa"}), %! {[1; 2; 2; NaN; 1] {"sa"; "et"} {"sa"; "et"}}) statistics-1.3.0/inst/harmmean.m0000755000000000000000000000215112776476211014775 0ustar 00000000000000## Copyright (C) 2001 Paul Kienzle ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} harmmean (@var{x}) ## @deftypefnx{Function File} harmmean (@var{x}, @var{dim}) ## Compute the harmonic mean. ## ## This function does the same as @code{mean (x, "h")}. ## ## @seealso{mean} ## @end deftypefn function a = harmmean(x, dim) if (nargin == 1) a = mean(x, "h"); elseif (nargin == 2) a = mean(x, "h", dim); else print_usage; endif endfunction statistics-1.3.0/inst/hist3.m0000755000000000000000000002464512776476211014253 0ustar 00000000000000## Copyright (C) 2015 Carnë Draug ## ## This program is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 3 of the ## License, or (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, see ## . ## -*- texinfo -*- ## @deftypefn {Function File} {} hist3 (@var{X}) ## @deftypefnx {Function File} {} hist3 (@var{X}, @var{nbins}) ## @deftypefnx {Function File} {} hist3 (@var{X}, @qcode{"Nbins"}, @var{nbins}) ## @deftypefnx {Function File} {} hist3 (@var{X}, @var{centers}) ## @deftypefnx {Function File} {} hist3 (@var{X}, @qcode{"Ctrs"}, @var{centers}) ## @deftypefnx {Function File} {} hist3 (@var{X}, @qcode{"Edges"}, @var{edges}) ## @deftypefnx {Function File} {[@var{N}, @var{C}] =} hist3 (@dots{}) ## @deftypefnx {Function File} {} hist3 (@dots{}, @var{prop}, @var{val}, @dots{}) ## @deftypefnx {Function File} {} hist3 (@var{hax}, @dots{}) ## Produce bivariate (2D) histogram counts or plots. ## ## The elements to produce the histogram are taken from the Nx2 matrix ## @var{X}. Any row with NaN values are ignored. The actual bins can ## be configured in 3 different: number, centers, or edges of the bins: ## ## @table @asis ## @item Number of bins (default) ## Produces equally spaced bins between the minimum and maximum values ## of @var{X}. Defined as a 2 element vector, @var{nbins}, one for each ## dimension. Defaults to @code{[10 10]}. ## ## @item Center of bins ## Defined as a cell array of 2 monotonically increasing vectors, ## @var{centers}. The width of each bin is determined from the adjacent ## values in the vector with the initial and final bin, extending to Infinity. ## ## @item Edge of bins ## Defined as a cell array of 2 monotonically increasing vectors, ## @var{edges}. @code{@var{N}(i,j)} contains the number of elements ## in @var{X} for which: ## ## @itemize @w{} ## @item ## @var{edges}@{1@}(i) <= @var{X}(:,1) < @var{edges}@{1@}(i+1) ## @item ## @var{edges}@{2@}(j) <= @var{X}(:,2) < @var{edges}@{2@}(j+1) ## @end itemize ## ## The consequence of this definition is that values outside the initial ## and final edge values are ignored, and that the final bin only contains ## the number of elements exactly equal to the final edge. ## ## @end table ## ## The return values, @var{N} and @var{C}, are the bin counts and centers ## respectively. These are specially useful to produce intensity maps: ## ## @example ## [counts, centers] = hist3 (data); ## imagesc (centers@{1@}, centers@{2@}, data) ## @end example ## ## If there is no output argument, or if the axes graphics handle ## @var{hax} is defined, the function will plot a 3 dimensional bar ## graph. Any extra property/value pairs are passed directly to the ## underlying surface object. ## ## @seealso{hist, histc, lookup, mesh} ## @end deftypefn function [N, C] = hist3 (X, varargin) if (nargin < 1) print_usage (); endif next_argin = 1; should_draw = true; if (isaxes (X)) hax = X; X = varargin{next_argin++}; elseif (nargout == 0) hax = gca (); else should_draw = false; endif if (! ismatrix (X) || columns (X) != 2) error ("hist3: X must be a 2 columns matrix"); endif X(any (isnan (X), 2)) = []; method = "nbins"; val = [10 10]; if (numel (varargin) >= next_argin) this_arg = varargin{next_argin++}; if (isnumeric (this_arg)) method = "nbins"; val = this_arg; elseif (iscell (this_arg)) method = "ctrs"; val = this_arg; elseif (numel (varargin) >= next_argin && any (strcmpi ({"nbins", "ctrs", "edges"}, this_arg))) method = tolower (this_arg); val = varargin{next_argin++}; else next_argin--; endif endif have_centers = false; switch (tolower (method)) case "nbins" [r_edges, c_edges] = edges_from_nbins (X, val); case "ctrs" have_centers = true; centers = val; [r_edges, c_edges] = edges_from_centers (val); case "centers" ## This was supported until 1.2.4 when the Matlab compatible option ## 'Ctrs' was added. persistent warned = false; if (! warned) warning ("hist3: option `centers' is deprecated. Use `ctrs'"); endif have_centers = true; centers = val; [r_edges, c_edges] = edges_from_centers (val); case "edges" if (! iscell (val) || numel (val) != 2 || ! all (cellfun (@isvector, val))) error ("hist3: EDGES must be a cell array with 2 vectors"); endif [r_edges] = vec (val{1}, 2); [c_edges] = vec (val{2}, 2); out_rows = any (X < [r_edges(1) c_edges(1)] | X > [r_edges(end) c_edges(end)], 2); X(out_rows,:) = []; otherwise ## we should never get here... error ("hist3: invalid binning method `%s'", method); endswitch r_idx = lookup (r_edges, X(:,1), "l"); c_idx = lookup (c_edges, X(:,2), "l"); counts_size = [numel(r_edges) numel(c_edges)]; counts = accumarray ([r_idx, c_idx], 1, counts_size); if (should_draw) counts = counts.'; z = zeros ((size (counts) +1) *2); z(2:end-1,2:end-1) = kron (counts, ones (2, 2)); ## Setting the values for the end of the histogram bin like this ## seems straight wrong but that's hwo Matlab plots look. y = [kron(c_edges, ones (1, 2)) (c_edges(end)*2-c_edges(end-1))([1 1])]; x = [kron(r_edges, ones (1, 2)) (r_edges(end)*2-r_edges(end-1))([1 1])]; mesh (hax, x, y, z, "facecolor", [.75 .85 .95], varargin{next_argin:end}); else N = counts; if (isargout (2)) if (! have_centers) C = {(r_edges + [diff(r_edges)([1:end end])]/ 2) ... (c_edges + [diff(c_edges)([1:end end])]/ 2)}; else C = centers(:)'; C{1} = vec (C{1}, 2); C{2} = vec (C{2}, 2); endif endif endif endfunction function [r_edges, c_edges] = edges_from_nbins (X, nbins) if (! isnumeric (nbins) || numel (nbins) != 2) error ("hist3: NBINS must be a 2 element vector"); endif inits = min (X); ends = max (X); ends -= (ends - inits) ./ vec (nbins, 2); r_edges = linspace (inits(1), ends(1), nbins(1)); c_edges = linspace (inits(2), ends(2), nbins(2)); endfunction function [r_edges, c_edges] = edges_from_centers (ctrs) if (! iscell (ctrs) || numel (ctrs) != 2 || ! all (cellfun (@isvector, ctrs))) error ("hist3: CTRS must be a cell array with 2 vectors"); endif r_edges = vec (ctrs{1}, 2); c_edges = vec (ctrs{2}, 2); r_edges(2:end) -= diff (r_edges) / 2; c_edges(2:end) -= diff (c_edges) / 2; endfunction %!demo %! X = [ %! 1 1 %! 1 1 %! 1 10 %! 1 10 %! 5 5 %! 5 5 %! 5 5 %! 5 5 %! 5 5 %! 7 3 %! 7 3 %! 7 3 %! 10 10 %! 10 10]; %! hist3 (X) %!test %! N_exp = [ 0 0 0 5 20 %! 0 0 10 15 0 %! 0 15 10 0 0 %! 20 5 0 0 0]; %! %! n = 100; %! x = [1:n]'; %! y = [n:-1:1]'; %! D = [x y]; %! N = hist3 (D, [4 5]); %! assert (N, N_exp); %!test %! N_exp = [0 0 0 0 1 %! 0 0 0 0 1 %! 0 0 0 0 1 %! 1 1 1 1 93]; %! %! n = 100; %! x = [1:n]'; %! y = [n:-1:1]'; %! D = [x y]; %! C{1} = [1 1.7 3 4]; %! C{2} = [1:5]; %! N = hist3 (D, C); %! assert (N, N_exp); ## bug 44987 %!test %! D = [1 1; 3 1; 3 3; 3 1]; %! [c, nn] = hist3 (D, {0:4, 0:4}); %! exp_c = zeros (5); %! exp_c([7 9 19]) = [1 2 1]; %! assert (c, exp_c); %! assert (nn, {0:4, 0:4}); %!test %! for i = 10 %! assert (size (hist3 (rand (9, 2), "Edges", {[0:.2:1]; [0:.2:1]})), [6 6]) %! endfor %!test %! edge_1 = linspace (0, 10, 10); %! edge_2 = linspace (0, 50, 10); %! [c, nn] = hist3 ([1:10; 1:5:50]', "Edges", {edge_1, edge_2}); %! exp_c = zeros (10, 10); %! exp_c([1 12 13 24 35 46 57 68 79 90]) = 1; %! assert (c, exp_c); %! %! assert (nn{1}, edge_1 + edge_1(2)/2, eps*10^4) %! assert (nn{2}, edge_2 + edge_2(2)/2, eps*10^4) %!shared X %! X = [ %! 5 2 %! 5 3 %! 1 4 %! 5 3 %! 4 4 %! 1 2 %! 2 3 %! 3 3 %! 5 4 %! 5 3]; %!test %! N = zeros (10); %! N([1 10 53 56 60 91 98 100]) = [1 1 1 1 3 1 1 1]; %! C = {(1.2:0.4:4.8), (2.1:0.2:3.9)}; %! assert (nthargout ([1 2], @hist3, X), {N C}, eps*10^3) %!test %! N = zeros (5, 7); %! N([1 5 17 18 20 31 34 35]) = [1 1 1 1 3 1 1 1]; %! C = {(1.4:0.8:4.6), ((2+(1/7)):(2/7):(4-(1/7)))}; %! assert (nthargout ([1 2], @hist3, X, [5 7]), {N C}, eps*10^3) %! assert (nthargout ([1 2], @hist3, X, "Nbins", [5 7]), {N C}, eps*10^3) %!test %! N = [0 1 0; 0 1 0; 0 0 1; 0 0 0]; %! C = {(2:5), (2.5:1:4.5)}; %! assert (nthargout ([1 2], @hist3, X, "Edges", {(1.5:4.5), (2:4)}), {N C}) %!test %! N = [0 0 1 0 1 0; 0 0 0 1 0 0; 0 0 1 4 2 0]; %! C = {(1.2:3.2), (0:5)}; %! assert (nthargout ([1 2], @hist3, X, "Ctrs", C), {N C}) %! assert (nthargout ([1 2], @hist3, X, C), {N C}) %!test %! [~, C] = hist3 (rand (10, 2), "Edges", {[0 .05 .15 .35 .55 .95], %! [-1 .05 .07 .2 .3 .5 .89 1.2]}); %! C_exp = {[ 0.025 0.1 0.25 0.45 0.75 1.15], ... %! [-0.475 0.06 0.135 0.25 0.4 0.695 1.045 1.355]}; %! assert (C, C_exp, eps*10^2) ## Test how handling of out of borders is different whether we are ## defining Centers or Edges. %!test %! Xv = repmat ([1:10]', [1 2]); %! %! ## Test Centers %! assert (hist3 (Xv, "Ctrs", {1:10, 1:10}), eye (10)) %! %! N_exp = eye (6); %! N_exp([1 end]) = 3; %! assert (hist3 (Xv, "Ctrs", {3:8, 3:8}), N_exp) %! %! N_exp = zeros (8, 6); %! N_exp([1 2 11 20 29 38 47 48]) = [2 1 1 1 1 1 1 2]; %! assert (hist3 (Xv, "Ctrs", {2:9, 3:8}), N_exp) %! %! ## Test Edges %! assert (hist3 (Xv, "Edges", {1:10, 1:10}), eye (10)) %! assert (hist3 (Xv, "Edges", {3:8, 3:8}), eye (6)) %! assert (hist3 (Xv, "Edges", {2:9, 3:8}), [zeros(1, 6); eye(6); zeros(1, 6)]) %! %! N_exp = zeros (14); %! N_exp(3:12, 3:12) = eye (10); %! assert (hist3 (Xv, "Edges", {-1:12, -1:12}), N_exp) %! %! ## Test for Nbins %! assert (hist3 (Xv), eye (10)) %! assert (hist3 (Xv, [10 10]), eye (10)) %! assert (hist3 (Xv, "nbins", [10 10]), eye (10)) %! assert (hist3 (Xv, [5 5]), eye (5) * 2) %! %! N_exp = zeros (7, 5); %! N_exp([1 9 10 18 26 27 35]) = [2 1 1 2 1 1 2]; %! assert (hist3 (Xv, [7 5]), N_exp) statistics-1.3.0/inst/histfit.m0000755000000000000000000000422612776476211014664 0ustar 00000000000000## Copyright (C) 2003 Alberto Terruzzi ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} histfit (@var{data}, @var{nbins}) ## ## Plot histogram with superimposed fitted normal density. ## ## @code{histfit (@var{data}, @var{nbins})} plots a histogram of the values in ## the vector @var{data} using @var{nbins} bars in the histogram. With one input ## argument, @var{nbins} is set to the square root of the number of elements in ## data. ## ## Example ## ## @example ## histfit (randn (100, 1)) ## @end example ## ## @seealso{bar,hist, pareto} ## @end deftypefn ## Author: Alberto Terruzzi ## Version: 1.0 ## Created: 3 March 2004 function histfit (data,nbins) if nargin < 1 || nargin > 2 print_usage; endif if isvector (data) != 1 error ("data must be a vector."); endif row = sum(~isnan(data)); if nargin < 2 nbins = ceil(sqrt(row)); endif [n,xbin]=hist(data,nbins); if any(abs(diff(xbin,2)) > 10*max(abs(xbin))*eps) error("histfit bins must be uniform width"); endif mr = nanmean(data); ## Estimates the parameter, MU, of the normal distribution. sr = nanstd(data); ## Estimates the parameter, SIGMA, of the normal distribution. x=(-3*sr+mr:0.1*sr:3*sr+mr)';## Evenly spaced samples of the expected data range. [xb,yb] = bar(xbin,n); y = normpdf(x,mr,sr); binwidth = xbin(2)-xbin(1); y = row*y*binwidth; ## Normalization necessary to overplot the histogram. plot(xb,yb,";;b",x,y,";;r-"); ## Plots density line over histogram. endfunction statistics-1.3.0/inst/hmmestimate.m0000755000000000000000000003254412776476211015533 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{transprobest}, @var{outprobest}] =} hmmestimate (@var{sequence}, @var{states}) ## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'statenames', @var{statenames}) ## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'symbols', @var{symbols}) ## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'pseudotransitions', @var{pseudotransitions}) ## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'pseudoemissions', @var{pseudoemissions}) ## Estimate the matrix of transition probabilities and the matrix of output ## probabilities of a given sequence of outputs and states generated by a ## hidden Markov model. The model assumes that the generation starts in ## state @code{1} at step @code{0} but does not include step @code{0} in the ## generated states and sequence. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{sequence} is a vector of a sequence of given outputs. The outputs ## must be integers ranging from @code{1} to the number of outputs of the ## hidden Markov model. ## ## @item ## @var{states} is a vector of the same length as @var{sequence} of given ## states. The states must be integers ranging from @code{1} to the number ## of states of the hidden Markov model. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{transprobest} is the matrix of the estimated transition ## probabilities of the states. @code{transprobest(i, j)} is the estimated ## probability of a transition to state @code{j} given state @code{i}. ## ## @item ## @var{outprobest} is the matrix of the estimated output probabilities. ## @code{outprobest(i, j)} is the estimated probability of generating ## output @code{j} given state @code{i}. ## @end itemize ## ## If @code{'symbols'} is specified, then @var{sequence} is expected to be a ## sequence of the elements of @var{symbols} instead of integers. ## @var{symbols} can be a cell array. ## ## If @code{'statenames'} is specified, then @var{states} is expected to be ## a sequence of the elements of @var{statenames} instead of integers. ## @var{statenames} can be a cell array. ## ## If @code{'pseudotransitions'} is specified then the integer matrix ## @var{pseudotransitions} is used as an initial number of counted ## transitions. @code{pseudotransitions(i, j)} is the initial number of ## counted transitions from state @code{i} to state @code{j}. ## @var{transprobest} will have the same size as @var{pseudotransitions}. ## Use this if you have transitions that are very unlikely to occur. ## ## If @code{'pseudoemissions'} is specified then the integer matrix ## @var{pseudoemissions} is used as an initial number of counted outputs. ## @code{pseudoemissions(i, j)} is the initial number of counted outputs ## @code{j} given state @code{i}. If @code{'pseudoemissions'} is also ## specified then the number of rows of @var{pseudoemissions} must be the ## same as the number of rows of @var{pseudotransitions}. @var{outprobest} ## will have the same size as @var{pseudoemissions}. Use this if you have ## outputs or states that are very unlikely to occur. ## ## @subheading Examples ## ## @example ## @group ## transprob = [0.8, 0.2; 0.4, 0.6]; ## outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1]; ## [sequence, states] = hmmgenerate (25, transprob, outprob); ## [transprobest, outprobest] = hmmestimate (sequence, states) ## @end group ## ## @group ## symbols = @{'A', 'B', 'C'@}; ## statenames = @{'One', 'Two'@}; ## [sequence, states] = hmmgenerate (25, transprob, outprob, ## 'symbols', symbols, 'statenames', statenames); ## [transprobest, outprobest] = hmmestimate (sequence, states, ## 'symbols', symbols, ## 'statenames', statenames) ## @end group ## ## @group ## pseudotransitions = [8, 2; 4, 6]; ## pseudoemissions = [2, 4, 4; 7, 2, 1]; ## [sequence, states] = hmmgenerate (25, transprob, outprob); ## [transprobest, outprobest] = hmmestimate (sequence, states, 'pseudotransitions', pseudotransitions, 'pseudoemissions', pseudoemissions) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Lawrence R. Rabiner. A Tutorial on Hidden Markov Models and Selected ## Applications in Speech Recognition. @cite{Proceedings of the IEEE}, ## 77(2), pages 257-286, February 1989. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Estimation of a hidden Markov model for a given sequence function [transprobest, outprobest] = hmmestimate (sequence, states, varargin) # Check arguments if (nargin < 2 || mod (length (varargin), 2) != 0) print_usage (); endif len = length (sequence); if (length (states) != len) error ("hmmestimate: sequence and states must have equal length"); endif # Flag for symbols usesym = false; # Flag for statenames usesn = false; # Variables for return values transprobest = []; outprobest = []; # Process varargin for i = 1:2:length (varargin) # There must be an identifier: 'symbols', 'statenames', # 'pseudotransitions' or 'pseudoemissions' if (! ischar (varargin{i})) print_usage (); endif # Upper case is also fine lowerarg = lower (varargin{i}); if (strcmp (lowerarg, 'symbols')) usesym = true; # Use the following argument as symbols symbols = varargin{i + 1}; # The same for statenames elseif (strcmp (lowerarg, 'statenames')) usesn = true; # Use the following argument as statenames statenames = varargin{i + 1}; elseif (strcmp (lowerarg, 'pseudotransitions')) # Use the following argument as an initial count for transitions transprobest = varargin{i + 1}; if (! ismatrix (transprobest)) error ("hmmestimate: pseudotransitions must be a non-empty numeric matrix"); endif if (rows (transprobest) != columns (transprobest)) error ("hmmestimate: pseudotransitions must be a square matrix"); endif elseif (strcmp (lowerarg, 'pseudoemissions')) # Use the following argument as an initial count for outputs outprobest = varargin{i + 1}; if (! ismatrix (outprobest)) error ("hmmestimate: pseudoemissions must be a non-empty numeric matrix"); endif else error ("hmmestimate: expected 'symbols', 'statenames', 'pseudotransitions' or 'pseudoemissions' but found '%s'", varargin{i}); endif endfor # Transform sequence from symbols to integers if necessary if (usesym) # sequenceint is used to build the transformed sequence sequenceint = zeros (1, len); for i = 1:length (symbols) # Search for symbols(i) in the sequence, isequal will have 1 at # corresponding indices; i is the right integer for that symbol isequal = ismember (sequence, symbols(i)); # We do not want to change sequenceint if the symbol appears a second # time in symbols if (any ((sequenceint == 0) & (isequal == 1))) isequal *= i; sequenceint += isequal; endif endfor if (! all (sequenceint)) index = max ((sequenceint == 0) .* (1:len)); error (["hmmestimate: sequence(" int2str (index) ") not in symbols"]); endif sequence = sequenceint; else if (! isvector (sequence)) error ("hmmestimate: sequence must be a non-empty vector"); endif if (! all (ismember (sequence, 1:max (sequence)))) index = max ((ismember (sequence, 1:max (sequence)) == 0) .* (1:len)); error (["hmmestimate: sequence(" int2str (index) ") not feasible"]); endif endif # Transform states from statenames to integers if necessary if (usesn) # statesint is used to build the transformed states statesint = zeros (1, len); for i = 1:length (statenames) # Search for statenames(i) in states, isequal will have 1 at # corresponding indices; i is the right integer for that statename isequal = ismember (states, statenames(i)); # We do not want to change statesint if the statename appears a second # time in statenames if (any ((statesint == 0) & (isequal == 1))) isequal *= i; statesint += isequal; endif endfor if (! all (statesint)) index = max ((statesint == 0) .* (1:len)); error (["hmmestimate: states(" int2str (index) ") not in statenames"]); endif states = statesint; else if (! isvector (states)) error ("hmmestimate: states must be a non-empty vector"); endif if (! all (ismember (states, 1:max (states)))) index = max ((ismember (states, 1:max (states)) == 0) .* (1:len)); error (["hmmestimate: states(" int2str (index) ") not feasible"]); endif endif # Estimate the number of different states as the max of states nstate = max (states); # Estimate the number of different outputs as the max of sequence noutput = max (sequence); # transprobest is empty if pseudotransitions is not specified if (isempty (transprobest)) # outprobest is not empty if pseudoemissions is specified if (! isempty (outprobest)) if (nstate > rows (outprobest)) error ("hmmestimate: not enough rows in pseudoemissions"); endif # The number of states is specified by pseudoemissions nstate = rows (outprobest); endif transprobest = zeros (nstate, nstate); else if (nstate > rows (transprobest)) error ("hmmestimate: not enough rows in pseudotransitions"); endif # The number of states is given by pseudotransitions nstate = rows (transprobest); endif # outprobest is empty if pseudoemissions is not specified if (isempty (outprobest)) outprobest = zeros (nstate, noutput); else if (noutput > columns (outprobest)) error ("hmmestimate: not enough columns in pseudoemissions"); endif # Number of outputs is specified by pseudoemissions noutput = columns (outprobest); if (rows (outprobest) != nstate) error ("hmmestimate: pseudoemissions must have the same number of rows as pseudotransitions"); endif endif # Assume that the model started in state 1 cstate = 1; for i = 1:len # Count the number of transitions for each state pair transprobest(cstate, states(i)) ++; cstate = states (i); # Count the number of outputs for each state output pair outprobest(cstate, sequence(i)) ++; endfor # transprobest and outprobest contain counted numbers # Each row in transprobest and outprobest should contain estimated # probabilities # => scale so that the sum is 1 # A zero row remains zero # - for transprobest s = sum (transprobest, 2); s(s == 0) = 1; transprobest = transprobest ./ (s * ones (1, nstate)); # - for outprobest s = sum (outprobest, 2); s(s == 0) = 1; outprobest = outprobest ./ (s * ones (1, noutput)); endfunction %!test %! sequence = [1, 2, 1, 1, 1, 2, 2, 1, 2, 3, 3, 3, 3, 2, 3, 1, 1, 1, 1, 3, 3, 2, 3, 1, 3]; %! states = [1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1]; %! [transprobest, outprobest] = hmmestimate (sequence, states); %! expectedtransprob = [0.88889, 0.11111; 0.28571, 0.71429]; %! expectedoutprob = [0.16667, 0.33333, 0.50000; 1.00000, 0.00000, 0.00000]; %! assert (transprobest, expectedtransprob, 0.001); %! assert (outprobest, expectedoutprob, 0.001); %!test %! sequence = {'A', 'B', 'A', 'A', 'A', 'B', 'B', 'A', 'B', 'C', 'C', 'C', 'C', 'B', 'C', 'A', 'A', 'A', 'A', 'C', 'C', 'B', 'C', 'A', 'C'}; %! states = {'One', 'One', 'Two', 'Two', 'Two', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'Two', 'Two', 'Two', 'Two', 'One', 'One', 'One', 'One', 'One', 'One'}; %! symbols = {'A', 'B', 'C'}; %! statenames = {'One', 'Two'}; %! [transprobest, outprobest] = hmmestimate (sequence, states, 'symbols', symbols, 'statenames', statenames); %! expectedtransprob = [0.88889, 0.11111; 0.28571, 0.71429]; %! expectedoutprob = [0.16667, 0.33333, 0.50000; 1.00000, 0.00000, 0.00000]; %! assert (transprobest, expectedtransprob, 0.001); %! assert (outprobest, expectedoutprob, 0.001); %!test %! sequence = [1, 2, 1, 1, 1, 2, 2, 1, 2, 3, 3, 3, 3, 2, 3, 1, 1, 1, 1, 3, 3, 2, 3, 1, 3]; %! states = [1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1]; %! pseudotransitions = [8, 2; 4, 6]; %! pseudoemissions = [2, 4, 4; 7, 2, 1]; %! [transprobest, outprobest] = hmmestimate (sequence, states, 'pseudotransitions', pseudotransitions, 'pseudoemissions', pseudoemissions); %! expectedtransprob = [0.85714, 0.14286; 0.35294, 0.64706]; %! expectedoutprob = [0.178571, 0.357143, 0.464286; 0.823529, 0.117647, 0.058824]; %! assert (transprobest, expectedtransprob, 0.001); %! assert (outprobest, expectedoutprob, 0.001); statistics-1.3.0/inst/hmmgenerate.m0000755000000000000000000002102412776476211015501 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{sequence}, @var{states}] =} hmmgenerate (@var{len}, @var{transprob}, @var{outprob}) ## @deftypefnx {Function File} {} hmmgenerate (@dots{}, 'symbols', @var{symbols}) ## @deftypefnx {Function File} {} hmmgenerate (@dots{}, 'statenames', @var{statenames}) ## Generate an output sequence and hidden states of a hidden Markov model. ## The model starts in state @code{1} at step @code{0} but will not include ## step @code{0} in the generated states and sequence. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{len} is the number of steps to generate. @var{sequence} and ## @var{states} will have @var{len} entries each. ## ## @item ## @var{transprob} is the matrix of transition probabilities of the states. ## @code{transprob(i, j)} is the probability of a transition to state ## @code{j} given state @code{i}. ## ## @item ## @var{outprob} is the matrix of output probabilities. ## @code{outprob(i, j)} is the probability of generating output @code{j} ## given state @code{i}. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{sequence} is a vector of length @var{len} of the generated ## outputs. The outputs are integers ranging from @code{1} to ## @code{columns (outprob)}. ## ## @item ## @var{states} is a vector of length @var{len} of the generated hidden ## states. The states are integers ranging from @code{1} to ## @code{columns (transprob)}. ## @end itemize ## ## If @code{'symbols'} is specified, then the elements of @var{symbols} are ## used for the output sequence instead of integers ranging from @code{1} to ## @code{columns (outprob)}. @var{symbols} can be a cell array. ## ## If @code{'statenames'} is specified, then the elements of ## @var{statenames} are used for the states instead of integers ranging from ## @code{1} to @code{columns (transprob)}. @var{statenames} can be a cell ## array. ## ## @subheading Examples ## ## @example ## @group ## transprob = [0.8, 0.2; 0.4, 0.6]; ## outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1]; ## [sequence, states] = hmmgenerate (25, transprob, outprob) ## @end group ## ## @group ## symbols = @{'A', 'B', 'C'@}; ## statenames = @{'One', 'Two'@}; ## [sequence, states] = hmmgenerate (25, transprob, outprob, ## 'symbols', symbols, 'statenames', statenames) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Lawrence R. Rabiner. A Tutorial on Hidden Markov Models and Selected ## Applications in Speech Recognition. @cite{Proceedings of the IEEE}, ## 77(2), pages 257-286, February 1989. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Output sequence and hidden states of a hidden Markov model function [sequence, states] = hmmgenerate (len, transprob, outprob, varargin) # Check arguments if (nargin < 3 || mod (length (varargin), 2) != 0) print_usage (); endif if (! isscalar (len) || len < 0 || round (len) != len) error ("hmmgenerate: len must be a non-negative scalar integer") endif if (! ismatrix (transprob)) error ("hmmgenerate: transprob must be a non-empty numeric matrix"); endif if (! ismatrix (outprob)) error ("hmmgenerate: outprob must be a non-empty numeric matrix"); endif # nstate is the number of states of the hidden Markov model nstate = rows (transprob); # noutput is the number of different outputs that the hidden Markov model # can generate noutput = columns (outprob); # Check whether transprob and outprob are feasible for a hidden Markov # model if (columns (transprob) != nstate) error ("hmmgenerate: transprob must be a square matrix"); endif if (rows (outprob) != nstate) error ("hmmgenerate: outprob must have the same number of rows as transprob"); endif # Flag for symbols usesym = false; # Flag for statenames usesn = false; # Process varargin for i = 1:2:length (varargin) # There must be an identifier: 'symbols' or 'statenames' if (! ischar (varargin{i})) print_usage (); endif # Upper case is also fine lowerarg = lower (varargin{i}); if (strcmp (lowerarg, 'symbols')) if (length (varargin{i + 1}) != noutput) error ("hmmgenerate: number of symbols does not match number of possible outputs"); endif usesym = true; # Use the following argument as symbols symbols = varargin{i + 1}; # The same for statenames elseif (strcmp (lowerarg, 'statenames')) if (length (varargin{i + 1}) != nstate) error ("hmmgenerate: number of statenames does not match number of states"); endif usesn = true; # Use the following argument as statenames statenames = varargin{i + 1}; else error ("hmmgenerate: expected 'symbols' or 'statenames' but found '%s'", varargin{i}); endif endfor # Each row in transprob and outprob should contain probabilities # => scale so that the sum is 1 # A zero row remains zero # - for transprob s = sum (transprob, 2); s(s == 0) = 1; transprob = transprob ./ repmat (s, 1, nstate); # - for outprob s = sum (outprob, 2); s(s == 0) = 1; outprob = outprob ./ repmat (s, 1, noutput); # Generate sequences of uniformly distributed random numbers between 0 and # 1 # - for the state transitions transdraw = rand (1, len); # - for the outputs outdraw = rand (1, len); # Generate the return vectors # They remain unchanged if the according probability row of transprob # and outprob contain, respectively, only zeros sequence = ones (1, len); states = ones (1, len); if (len > 0) # Calculate cumulated probabilities backwards for easy comparison with # the generated random numbers # Cumulated probability in first column must always be 1 # We might have a zero row # - for transprob transprob(:, end:-1:1) = cumsum (transprob(:, end:-1:1), 2); transprob(:, 1) = 1; # - for outprob outprob(:, end:-1:1) = cumsum (outprob(:, end:-1:1), 2); outprob(:, 1) = 1; # cstate is the current state # Start in state 1 but do not include it in the states vector cstate = 1; for i = 1:len # Compare the randon number i of transdraw to the cumulated # probability of the state transition and set the transition # accordingly states(i) = sum (transdraw(i) <= transprob(cstate, :)); cstate = states(i); endfor # Compare the random numbers of outdraw to the cumulated probabilities # of the outputs and set the sequence vector accordingly sequence = sum (repmat (outdraw, noutput, 1) <= outprob(states, :)', 1); # Transform default matrices into symbols/statenames if requested if (usesym) sequence = reshape (symbols(sequence), 1, len); endif if (usesn) states = reshape (statenames(states), 1, len); endif endif endfunction %!test %! len = 25; %! transprob = [0.8, 0.2; 0.4, 0.6]; %! outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1]; %! [sequence, states] = hmmgenerate (len, transprob, outprob); %! assert (length (sequence), len); %! assert (length (states), len); %! assert (min (sequence) >= 1); %! assert (max (sequence) <= columns (outprob)); %! assert (min (states) >= 1); %! assert (max (states) <= rows (transprob)); %!test %! len = 25; %! transprob = [0.8, 0.2; 0.4, 0.6]; %! outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1]; %! symbols = {'A', 'B', 'C'}; %! statenames = {'One', 'Two'}; %! [sequence, states] = hmmgenerate (len, transprob, outprob, 'symbols', symbols, 'statenames', statenames); %! assert (length (sequence), len); %! assert (length (states), len); %! assert (strcmp (sequence, 'A') + strcmp (sequence, 'B') + strcmp (sequence, 'C') == ones (1, len)); %! assert (strcmp (states, 'One') + strcmp (states, 'Two') == ones (1, len)); statistics-1.3.0/inst/hmmviterbi.m0000755000000000000000000002205112776476211015354 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{vpath} =} hmmviterbi (@var{sequence}, @var{transprob}, @var{outprob}) ## @deftypefnx {Function File} {} hmmviterbi (@dots{}, 'symbols', @var{symbols}) ## @deftypefnx {Function File} {} hmmviterbi (@dots{}, 'statenames', @var{statenames}) ## Use the Viterbi algorithm to find the Viterbi path of a hidden Markov ## model given a sequence of outputs. The model assumes that the generation ## starts in state @code{1} at step @code{0} but does not include step ## @code{0} in the generated states and sequence. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{sequence} is the vector of length @var{len} of given outputs. The ## outputs must be integers ranging from @code{1} to ## @code{columns (outprob)}. ## ## @item ## @var{transprob} is the matrix of transition probabilities of the states. ## @code{transprob(i, j)} is the probability of a transition to state ## @code{j} given state @code{i}. ## ## @item ## @var{outprob} is the matrix of output probabilities. ## @code{outprob(i, j)} is the probability of generating output @code{j} ## given state @code{i}. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{vpath} is the vector of the same length as @var{sequence} of the ## estimated hidden states. The states are integers ranging from @code{1} to ## @code{columns (transprob)}. ## @end itemize ## ## If @code{'symbols'} is specified, then @var{sequence} is expected to be a ## sequence of the elements of @var{symbols} instead of integers ranging ## from @code{1} to @code{columns (outprob)}. @var{symbols} can be a cell array. ## ## If @code{'statenames'} is specified, then the elements of ## @var{statenames} are used for the states in @var{vpath} instead of ## integers ranging from @code{1} to @code{columns (transprob)}. ## @var{statenames} can be a cell array. ## ## @subheading Examples ## ## @example ## @group ## transprob = [0.8, 0.2; 0.4, 0.6]; ## outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1]; ## [sequence, states] = hmmgenerate (25, transprob, outprob) ## vpath = hmmviterbi (sequence, transprob, outprob) ## @end group ## ## @group ## symbols = @{'A', 'B', 'C'@}; ## statenames = @{'One', 'Two'@}; ## [sequence, states] = hmmgenerate (25, transprob, outprob, ## 'symbols', symbols, 'statenames', statenames) ## vpath = hmmviterbi (sequence, transprob, outprob, ## 'symbols', symbols, 'statenames', statenames) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Lawrence R. Rabiner. A Tutorial on Hidden Markov Models and Selected ## Applications in Speech Recognition. @cite{Proceedings of the IEEE}, ## 77(2), pages 257-286, February 1989. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Viterbi path of a hidden Markov model function vpath = hmmviterbi (sequence, transprob, outprob, varargin) # Check arguments if (nargin < 3 || mod (length (varargin), 2) != 0) print_usage (); endif if (! ismatrix (transprob)) error ("hmmviterbi: transprob must be a non-empty numeric matrix"); endif if (! ismatrix (outprob)) error ("hmmviterbi: outprob must be a non-empty numeric matrix"); endif len = length (sequence); # nstate is the number of states of the hidden Markov model nstate = rows (transprob); # noutput is the number of different outputs that the hidden Markov model # can generate noutput = columns (outprob); # Check whether transprob and outprob are feasible for a hidden Markov model if (columns (transprob) != nstate) error ("hmmviterbi: transprob must be a square matrix"); endif if (rows (outprob) != nstate) error ("hmmviterbi: outprob must have the same number of rows as transprob"); endif # Flag for symbols usesym = false; # Flag for statenames usesn = false; # Process varargin for i = 1:2:length (varargin) # There must be an identifier: 'symbols' or 'statenames' if (! ischar (varargin{i})) print_usage (); endif # Upper case is also fine lowerarg = lower (varargin{i}); if (strcmp (lowerarg, 'symbols')) if (length (varargin{i + 1}) != noutput) error ("hmmviterbi: number of symbols does not match number of possible outputs"); endif usesym = true; # Use the following argument as symbols symbols = varargin{i + 1}; # The same for statenames elseif (strcmp (lowerarg, 'statenames')) if (length (varargin{i + 1}) != nstate) error ("hmmviterbi: number of statenames does not match number of states"); endif usesn = true; # Use the following argument as statenames statenames = varargin{i + 1}; else error ("hmmviterbi: expected 'symbols' or 'statenames' but found '%s'", varargin{i}); endif endfor # Transform sequence from symbols to integers if necessary if (usesym) # sequenceint is used to build the transformed sequence sequenceint = zeros (1, len); for i = 1:noutput # Search for symbols(i) in the sequence, isequal will have 1 at # corresponding indices; i is the right integer for that symbol isequal = ismember (sequence, symbols(i)); # We do not want to change sequenceint if the symbol appears a second # time in symbols if (any ((sequenceint == 0) & (isequal == 1))) isequal *= i; sequenceint += isequal; endif endfor if (! all (sequenceint)) index = max ((sequenceint == 0) .* (1:len)); error (["hmmviterbi: sequence(" int2str (index) ") not in symbols"]); endif sequence = sequenceint; else if (! isvector (sequence) && ! isempty (sequence)) error ("hmmviterbi: sequence must be a vector"); endif if (! all (ismember (sequence, 1:noutput))) index = max ((ismember (sequence, 1:noutput) == 0) .* (1:len)); error (["hmmviterbi: sequence(" int2str (index) ") out of range"]); endif endif # Each row in transprob and outprob should contain log probabilities # => scale so that the sum is 1 and convert to log space # - for transprob s = sum (transprob, 2); s(s == 0) = 1; transprob = log (transprob ./ (s * ones (1, columns (transprob)))); # - for outprob s = sum (outprob, 2); s(s == 0) = 1; outprob = log (outprob ./ (s * ones (1, columns (outprob)))); # Store the path starting from i in spath(i, :) spath = ones (nstate, len + 1); # Set the first state for each path spath(:, 1) = (1:nstate)'; # Store the probability of path i in spathprob(i) spathprob = transprob(1, :); # Find the most likely paths for the given output sequence for i = 1:len # Calculate the new probabilities of the continuation with each state nextpathprob = ((spathprob' + outprob(:, sequence(i))) * ones (1, nstate)) + transprob; # Find the paths with the highest probabilities [spathprob, mindex] = max (nextpathprob); # Update spath and spathprob with the new paths spath = spath(mindex, :); spath(:, i + 1) = (1:nstate)'; endfor # Set vpath to the most likely path # We do not want the last state because we do not have an output for it [m, mindex] = max (spathprob); vpath = spath(mindex, 1:len); # Transform vpath into statenames if requested if (usesn) vpath = reshape (statenames(vpath), 1, len); endif endfunction %!test %! sequence = [1, 2, 1, 1, 1, 2, 2, 1, 2, 3, 3, 3, 3, 2, 3, 1, 1, 1, 1, 3, 3, 2, 3, 1, 3]; %! transprob = [0.8, 0.2; 0.4, 0.6]; %! outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1]; %! vpath = hmmviterbi (sequence, transprob, outprob); %! expected = [1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1]; %! assert (vpath, expected); %!test %! sequence = {'A', 'B', 'A', 'A', 'A', 'B', 'B', 'A', 'B', 'C', 'C', 'C', 'C', 'B', 'C', 'A', 'A', 'A', 'A', 'C', 'C', 'B', 'C', 'A', 'C'}; %! transprob = [0.8, 0.2; 0.4, 0.6]; %! outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1]; %! symbols = {'A', 'B', 'C'}; %! statenames = {'One', 'Two'}; %! vpath = hmmviterbi (sequence, transprob, outprob, 'symbols', symbols, 'statenames', statenames); %! expected = {'One', 'One', 'Two', 'Two', 'Two', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'Two', 'Two', 'Two', 'Two', 'One', 'One', 'One', 'One', 'One', 'One'}; %! assert (vpath, expected); statistics-1.3.0/inst/hygestat.m0000755000000000000000000000746112776476211015046 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{mn}, @var{v}] =} hygestat (@var{t}, @var{m}, @var{n}) ## Compute mean and variance of the hypergeometric distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{t} is the total size of the population of the hypergeometric ## distribution. The elements of @var{t} must be positive natural numbers ## ## @item ## @var{m} is the number of marked items of the hypergeometric distribution. ## The elements of @var{m} must be natural numbers ## ## @item ## @var{n} is the size of the drawn sample of the hypergeometric ## distribution. The elements of @var{n} must be positive natural numbers ## @end itemize ## @var{t}, @var{m}, and @var{n} must be of common size or scalar ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{mn} is the mean of the hypergeometric distribution ## ## @item ## @var{v} is the variance of the hypergeometric distribution ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## t = 4:9; ## m = 0:5; ## n = 1:6; ## [mn, v] = hygestat (t, m, n) ## @end group ## ## @group ## [mn, v] = hygestat (t, m, 2) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the hypergeometric distribution function [mn, v] = hygestat (t, m, n) # Check arguments if (nargin != 3) print_usage (); endif if (! isempty (t) && ! ismatrix (t)) error ("hygestat: t must be a numeric matrix"); endif if (! isempty (m) && ! ismatrix (m)) error ("hygestat: m must be a numeric matrix"); endif if (! isempty (n) && ! ismatrix (n)) error ("hygestat: n must be a numeric matrix"); endif if (! isscalar (t) || ! isscalar (m) || ! isscalar (n)) [retval, t, m, n] = common_size (t, m, n); if (retval > 0) error ("hygestat: t, m and n must be of common size or scalar"); endif endif # Calculate moments mn = (n .* m) ./ t; v = (n .* (m ./ t) .* (1 - m ./ t) .* (t - n)) ./ (t - 1); # Continue argument check k = find (! (t >= 0) | ! (m >= 0) | ! (n > 0) | ! (t == round (t)) | ! (m == round (m)) | ! (n == round (n)) | ! (m <= t) | ! (n <= t)); if (any (k)) mn(k) = NaN; v(k) = NaN; endif endfunction %!test %! t = 4:9; %! m = 0:5; %! n = 1:6; %! [mn, v] = hygestat (t, m, n); %! expected_mn = [0.0000, 0.4000, 1.0000, 1.7143, 2.5000, 3.3333]; %! expected_v = [0.0000, 0.2400, 0.4000, 0.4898, 0.5357, 0.5556]; %! assert (mn, expected_mn, 0.001); %! assert (v, expected_v, 0.001); %!test %! t = 4:9; %! m = 0:5; %! [mn, v] = hygestat (t, m, 2); %! expected_mn = [0.0000, 0.4000, 0.6667, 0.8571, 1.0000, 1.1111]; %! expected_v = [0.0000, 0.2400, 0.3556, 0.4082, 0.4286, 0.4321]; %! assert (mn, expected_mn, 0.001); %! assert (v, expected_v, 0.001); statistics-1.3.0/inst/iwishpdf.m0000755000000000000000000000545112776476211015030 0ustar 00000000000000## Copyright (C) 2013 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License along with Octave; see the file COPYING. If not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {} @var{y} = iwishpdf (@var{W}, @var{Tau}, @var{df}, @var{log_y}=false) ## Compute the probability density function of the Wishart distribution ## ## Inputs: A @var{p} x @var{p} matrix @var{W} where to find the PDF and the @var{p} x @var{p} positive definite scale matrix @var{Tau} and scalar degrees of freedom parameter @var{df} characterizing the inverse Wishart distribution. (For the density to be finite, need @var{df} > (@var{p} - 1).) ## If the flag @var{log_y} is set, return the log probability density -- this helps avoid underflow when the numerical value of the density is very small ## ## Output: @var{y} is the probability density of Wishart(@var{Sigma}, @var{df}) at @var{W}. ## ## @seealso{iwishrnd, wishpdf} ## @end deftypefn ## Author: Nir Krakauer ## Description: Compute the probability density function of the inverse Wishart distribution function [y] = iwishpdf(W, Tau, df, log_y=false) if (nargin < 3) print_usage (); endif p = size(Tau, 1); if (df <= (p - 1)) error('df too small, no finite densities exist') endif #calculate the logarithm of G_d(df/2), the multivariate gamma function g = (p * (p-1) / 4) * log(pi); for i = 1:p g = g + log(gamma((df + (1 - i))/2)); #using lngamma_gsl(.) from the gsl package instead of log(gamma(.)) might help avoid underflow/overflow endfor C = chol(W); #use formulas for determinant of positive definite matrix for better efficiency and numerical accuracy logdet_W = 2*sum(log(diag(C))); logdet_Tau = 2*sum(log(diag(chol(Tau)))); y = -(df*p)/2 * log(2) + (df/2)*logdet_Tau - g - ((df + p + 1)/2)*logdet_W - trace(Tau*chol2inv(C))/2; if ~log_y y = exp(y); endif endfunction ##test results cross-checked against diwish function in R MCMCpack library %!assert(iwishpdf(4, 3, 3.1), 0.04226595, 1E-7); %!assert(iwishpdf([2 -0.3;-0.3 4], [1 0.3;0.3 1], 4), 1.60166e-05, 1E-10); %!assert(iwishpdf([6 2 5; 2 10 -5; 5 -5 25], [9 5 5; 5 10 -8; 5 -8 22], 5.1), 4.946831e-12, 1E-17); %% Test input validation %!error iwishpdf () %!error iwishpdf (1, 2) %!error iwishpdf (1, 2, 0) %!error wishpdf (1, 2) statistics-1.3.0/inst/iwishrnd.m0000755000000000000000000000543712776476211015046 0ustar 00000000000000## Copyright (C) 2013 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License along with Octave; see the file COPYING. If not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {} [@var{W}[, @var{DI}]] = iwishrnd (@var{Psi}, @var{df}[, @var{DI}][, @var{n}=1]) ## Return a random matrix sampled from the inverse Wishart distribution with given parameters ## ## Inputs: the @var{p} x @var{p} positive definite matrix @var{Tau} and scalar degrees of freedom parameter @var{df} (and optionally the transposed Cholesky factor @var{DI} of @var{Sigma} = @code{inv(Tau)}). ## @var{df} can be non-integer as long as @var{df} > @var{d} ## ## Output: a random @var{p} x @var{p} matrix @var{W} from the inverse Wishart(@var{Tau}, @var{df}) distribution. (@code{inv(W)} is from the Wishart(@code{inv(Tau)}, @var{df}) distribution.) If @var{n} > 1, then @var{W} is @var{p} x @var{p} x @var{n} and holds @var{n} such random matrices. (Optionally, the transposed Cholesky factor @var{DI} of @var{Sigma} is also returned.) ## ## Averaged across many samples, the mean of @var{W} should approach @var{Tau} / (@var{df} - @var{p} - 1). ## ## Reference: Yu-Cheng Ku and Peter Bloomfield (2010), Generating Random Wishart Matrices with Fractional Degrees of Freedom in OX, http://www.gwu.edu/~forcpgm/YuChengKu-030510final-WishartYu-ChengKu.pdf ## ## @seealso{wishrnd, iwishpdf} ## @end deftypefn ## Author: Nir Krakauer ## Description: Random matrices from the inverse Wishart distribution function [W, DI] = iwishrnd(Tau, df, DI, n = 1) if (nargin < 2) print_usage (); endif if nargin < 3 || isempty(DI) try D = chol(inv(Tau)); catch error('Cholesky decomposition failed; Tau probably not positive definite') end_try_catch DI = D'; else D = DI'; endif w = wishrnd([], df, D, n); if n > 1 p = size(D, 1); W = nan(p, p, n); endif for i = 1:n W(:, :, i) = inv(w(:, :, i)); endfor endfunction %!assert(size (iwishrnd (1,2,1)), [1, 1]); %!assert(size (iwishrnd ([],2,1)), [1, 1]); %!assert(size (iwishrnd ([3 1; 1 3], 2.00001, [], 1)), [2, 2]); %!assert(size (iwishrnd (eye(2), 2, [], 3)), [2, 2, 3]); %% Test input validation %!error iwishrnd () %!error iwishrnd (1) %!error iwishrnd ([-3 1; 1 3],1) %!error iwishrnd ([1; 1],1) statistics-1.3.0/inst/jackknife.m0000755000000000000000000001177112776476211015142 0ustar 00000000000000## Copyright (C) 2011 Alexander Klein ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn{Function File} {@var{jackstat} =} jackknife (@var{E}, @var{x}, @dots{}) ## Compute jackknife estimates of a parameter taking one or more given samples as parameters. ## In particular, @var{E} is the estimator to be jackknifed as a function name, handle, ## or inline function, and @var{x} is the sample for which the estimate is to be taken. ## The @var{i}-th entry of @var{jackstat} will contain the value of the estimator ## on the sample @var{x} with its @var{i}-th row omitted. ## ## @example ## @group ## jackstat(@var{i}) = @var{E}(@var{x}(1 : @var{i} - 1, @var{i} + 1 : length(@var{x}))) ## @end group ## @end example ## ## Depending on the number of samples to be used, the estimator must have the appropriate form: ## If only one sample is used, then the estimator need not be concerned with cell arrays, ## for example jackknifing the standard deviation of a sample can be performed with ## @code{@var{jackstat} = jackknife (@@std, rand (100, 1))}. ## If, however, more than one sample is to be used, the samples must all be of equal size, ## and the estimator must address them as elements of a cell-array, ## in which they are aggregated in their order of appearance: ## ## @example ## @group ## @var{jackstat} = jackknife(@@(x) std(x@{1@})/var(x@{2@}), rand (100, 1), randn (100, 1) ## @end group ## @end example ## ## If all goes well, a theoretical value @var{P} for the parameter is already known, ## @var{n} is the sample size, ## @code{@var{t} = @var{n} * @var{E}(@var{x}) - (@var{n} - 1) * mean(@var{jackstat})}, and ## @code{@var{v} = sumsq(@var{n} * @var{E}(@var{x}) - (@var{n} - 1) * @var{jackstat} - @var{t}) / (@var{n} * (@var{n} - 1))}, then ## @code{(@var{t}-@var{P})/sqrt(@var{v})} should follow a t-distribution with @var{n}-1 degrees of freedom. ## ## Jackknifing is a well known method to reduce bias; further details can be found in: ## @itemize @bullet ## @item Rupert G. Miller: The jackknife-a review; Biometrika (1974) 61(1): 1-15; doi:10.1093/biomet/61.1.1 ## @item Rupert G. Miller: Jackknifing Variances; Ann. Math. Statist. Volume 39, Number 2 (1968), 567-582; doi:10.1214/aoms/1177698418 ## @item M. H. Quenouille: Notes on Bias in Estimation; Biometrika Vol. 43, No. 3/4 (Dec., 1956), pp. 353-360; doi:10.1093/biomet/43.3-4.353 ## @end itemize ## @end deftypefn ## Author: Alexander Klein ## Created: 2011-11-25 function jackstat = jackknife ( anEstimator, varargin ) ## Convert function name to handle if necessary, or throw ## an error. if ( !strcmp ( typeinfo ( anEstimator ), "function handle" ) ) if ( isascii ( anEstimator ) ) anEstimator = str2func ( anEstimator ); else error ( "Estimators must be passed as function names or handles!" ); end end ## Simple jackknifing can be done with a single vector argument, and ## first and foremost with a function that does not care about ## cell-arrays. if ( length ( varargin ) == 1 && isnumeric ( varargin { 1 } ) ) aSample = varargin { 1 }; g = length ( aSample ); jackstat = zeros ( 1, g ); for k = 1 : g jackstat ( k ) = anEstimator ( aSample ( [ 1 : k - 1, k + 1 : g ] ) ); end ## More complicated input requires more work, however. else g = cellfun ( @(x) length ( x ), varargin ); if ( any ( g - g ( 1 ) ) ) error ( "All passed data must be of equal length!" ); end g = g ( 1 ); jackstat = zeros ( 1, g ); for k = 1 : g jackstat ( k ) = anEstimator ( cellfun ( @(x) x( [ 1 : k - 1, k + 1 : g ] ), varargin, "UniformOutput", false ) ); end end endfunction %!test %! ##Example from Quenouille, Table 1 %! d=[0.18 4.00 1.04 0.85 2.14 1.01 3.01 2.33 1.57 2.19]; %! jackstat = jackknife ( @(x) 1/mean(x), d ); %! assert ( 10 / mean(d) - 9 * mean(jackstat), 0.5240, 1e-5 ); %!demo %! for k = 1:1000 %! x=rand(10,1); %! s(k)=std(x); %! jackstat=jackknife(@std,x); %! j(k)=10*std(x) - 9*mean(jackstat); %! end %! figure();hist([s',j'], 0:sqrt(1/12)/10:2*sqrt(1/12)) %!demo %! for k = 1:1000 %! x=randn(1,50); %! y=rand(1,50); %! jackstat=jackknife(@(x) std(x{1})/std(x{2}),y,x); %! j(k)=50*std(y)/std(x) - 49*mean(jackstat); %! v(k)=sumsq((50*std(y)/std(x) - 49*jackstat) - j(k)) / (50 * 49); %! end %! t=(j-sqrt(1/12))./sqrt(v); %! figure();plot(sort(tcdf(t,49)),"-;Almost linear mapping indicates good fit with t-distribution.;") statistics-1.3.0/inst/jsucdf.m0000755000000000000000000000402512776476211014465 0ustar 00000000000000## Copyright (C) 2006 Frederick (Rick) A Niles ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {} jsucdf (@var{x}, @var{alpha1}, @var{alpha2}) ## For each element of @var{x}, compute the cumulative distribution ## function (CDF) at @var{x} of the Johnson SU distribution with shape parameters ## @var{alpha1} and @var{alpha2}. ## ## Default values are @var{alpha1} = 1, @var{alpha2} = 1. ## @end deftypefn ## Author: Frederick (Rick) A Niles ## Description: CDF of the Johnson SU distribution ## This function is derived from normcdf.m ## This is the TeX equation of this function: ## ## \[ F(x) = \Phi\left(\alpha_1 + \alpha_2 ## \log\left(x + \sqrt{x^2 + 1} \right)\right) \] ## ## where \[ -\infty < x < \infty ; \alpha_2 > 0 \] and $\Phi$ is the ## standard normal cumulative distribution function. $\alpha_1$ and ## $\alpha_2$ are shape parameters. function cdf = jsucdf (x, alpha1, alpha2) if (! ((nargin == 1) || (nargin == 3))) print_usage; endif if (nargin == 1) m = 0; v = 1; endif if (!isscalar (alpha1) || !isscalar(alpha2)) [retval, x, alpha1, alpha2] = common_size (x, alpha1, alpha2); if (retval > 0) error ("normcdf: x, alpha1 and alpha2 must be of common size or scalar"); endif endif one = ones (size (x)); cdf = stdnormal_cdf (alpha1 .* one + alpha2 .* log (x + sqrt(x.*x + one))); endfunction statistics-1.3.0/inst/jsupdf.m0000755000000000000000000000414712776476211014507 0ustar 00000000000000## Copyright (C) 2006 Frederick (Rick) A Niles ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {} jsupdf (@var{x}, @var{alpha1}, @var{alpha2}) ## For each element of @var{x}, compute the probability density function ## (PDF) at @var{x} of the Johnson SU distribution with shape parameters @var{alpha1} ## and @var{alpha2}. ## ## Default values are @var{alpha1} = 1, @var{alpha2} = 1. ## @end deftypefn ## Author: Frederick (Rick) A Niles ## Description: PDF of Johnson SU distribution ## This function is derived from normpdf.m ## This is the TeX equation of this function: ## ## \[ f(x) = \frac{\alpha_2}{\sqrt{x^2+1}} \phi\left(\alpha_1+\alpha_2 ## \log{\left(x+\sqrt{x^2+1}\right)}\right) \] ## ## where \[ -\infty < x < \infty ; \alpha_2 > 0 \] and $\phi$ is the ## standard normal probability distribution function. $\alpha_1$ and ## $\alpha_2$ are shape parameters. function pdf = jsupdf (x, alpha1, alpha2) if (nargin != 1 && nargin != 3) print_usage; endif if (nargin == 1) alpha1 = 1; alpha2 = 1; endif if (!isscalar (alpha1) || !isscalar(alpha2)) [retval, x, alpha1, alpha2] = common_size (x, alpha1, alpha2); if (retval > 0) error ("normpdf: x, alpha1 and alpha2 must be of common size or scalars"); endif endif one = ones(size(x)); sr = sqrt(x.*x + one); pdf = (alpha2 ./ sr) .* stdnormal_pdf (alpha1 .* one + alpha2 .* log (x + sr)); endfunction statistics-1.3.0/inst/kmeans.m0000755000000000000000000004132712776476211014473 0ustar 00000000000000## Copyright (C) 2011 Soren Hauberg ## Copyright (C) 2012 Daniel Ward ## Copyright (C) 2015-2016 Lachlan Andrew ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {} {[@var{idx}, @var{centers}, @var{sumd}, @var{dist}] =} kmeans (@var{data}, @var{k}, @var{param1}, @var{value1}, @dots{}) ## Perform a @var{k}-means clustering of the @var{N}x@var{D} table @var{data}. ## If parameter @qcode{start} is specified, then @var{k} may be empty ## in which case @var{k} is set to the number of rows of @var{start}. ## ## The outputs are: ## @table @code ## @item @var{idx} ## An @var{N}x1 vector whose @var{i}th element is the class to which row @var{i} ## of @var{data} is assigned. ## ## @item @var{centers} ## A @var{K}x@var{D} array whose @var{i}th row is the centroid of cluster ## @var{i}. ## ## @item @var{sumd} ## A @var{k}x1 vector whose @var{i}th entry is the sum of the distances ## from samples in cluster @var{i} to centroid @var{i}. ## ## @item @var{dist} ## An @var{N}x@var{k} matrix whose @var{i}@var{j}th element is ## the distance from sample @var{i} to centroid @var{j}. ## @end table ## ## The following parameters may be placed in any order. Each parameter ## must be followed by its value. ## @table @code ## @item @var{Start} ## The initialization method for the centroids. ## @table @code ## @item @code{plus} ## (Default) The k-means++ algorithm. ## @item @code{sample} # A subset of @var{k} rows from @var{data}, ## sampled uniformly without replacement. ## @item @code{cluster} ## Perform a pilot clustering on 10% of the rows of @var{data}. ## @item @code{uniform} ## Each component of each centroid is drawn uniformly ## from the interval between the maximum and minimum values of that ## component within @var{data}. ## This performs poorly and is implemented only for Matlab compatibility. ## @item A ## A @var{k}x@var{D}x@var{r} matrix, where @var{r} is the number of ## replicates. ## @end table ## ## @item @var{Replicates} ## An positive integer specifying the number of independent clusterings to ## perform. ## The output values are the values for the best clustering, i.e., ## the one with the smallest value of @var{sumd}. ## If @var{Start} is numeric, then @var{Replicates} defaults to # (and must equal) the size of the third dimension of @var{Start}. ## Otherwise it defaults to 1. ## ## @item @var{MaxIter} ## The maximum number of iterations to perform for each replicate. ## If the maximum change of any centroid is less than 0.001, then ## the replicate terminates even if @var{MaxIter} iterations have no occurred. ## The default is 100. ## ## @item @var{Distance} ## The distance measure used for partitioning and calculating centroids. ## @table @code ## @item @qcode{sqeuclidean} ## The squared Euclidean distance, i.e., ## the sum of the squares of the differences between corresponding components. ## In this case, the centroid is the arithmetic mean of all samples in ## its cluster. ## This is the only distance for which this algorithm is truly "k-means". ## ## @item @qcode{cityblock} ## The sum metric, or L1 distance, i.e., ## the sum of the absolute differences between corresponding components. ## In this case, the centroid is the median of all samples in its cluster. ## This gives the k-medians algorithm. ## ## @item @qcode{cosine} ## (Documentation incomplete.) ## ## @item @qcode{correlation} ## (Documentation incomplete.) ## ## @item @qcode{hamming} ## The number of components in which the sample and the centroid differ. ## In this case, the centroid is the median of all samples in its cluster. ## Unlike Matlab, Octave allows non-logical @var{data}. ## ## @end table ## ## @item @var{EmptyAction} ## What to do when a centroid is not the closest to any data sample. ## @table @code ## @item @qcode{error} ## (Default) Throw an error. ## @item @qcode{singleton} ## Select the row of @var{data} that has the highest error and ## use that as the new centroid. ## @item @qcode{drop} ## Remove the centroid, and continue computation with one fewer centroid. ## The dimensions of the outputs @var{centroids} and @var{d} ## are unchanged, with values for omitted centroids replaced by NA. ## ## @end table ## @end table ## ## Example: ## ## [~,c] = kmeans (rand(10, 3), 2, "emptyaction", "singleton"); ## ## @seealso{linkage} ## @end deftypefn function [classes, centers, sumd, D] = kmeans (data, k, varargin) [reg, prop] = parseparams (varargin); ## defaults for options emptyaction = "error"; start = "plus"; replicates = 1; max_iter = 100; distance = "sqeuclidean"; replicates_set_explicitly = false; ## Remove rows containing NaN / NA, but record which rows are used data_idx = ! any (isnan (data), 2); original_rows = rows (data); data = data(data_idx,:); #used for getting the number of samples n_rows = rows (data); #used for convergence of the centroids err = 1; ## Input checking, validate the matrix if (! isnumeric (data) || ! ismatrix (data) || ! isreal (data)) error ("kmeans: first input argument must be a DxN real data matrix"); elseif (! isnumeric (k)) error ("kmeans: second argument must be numeric"); endif ## Parse options while (length (prop) > 0) if (length (prop) < 2) error ("kmeans: Option '%s' has no argument", prop{1}); endif switch (lower (prop{1})) case "emptyaction" emptyaction = prop{2}; case "start" start = prop{2}; case "maxiter" max_iter = prop{2}; case "distance" distance = prop{2}; case "replicates" replicates = prop{2}; replicates_set_explicitly = true; case {"display", "onlinephase", "options"} warning ("kmeans: Ignoring unimplemented option '%s'", prop{1}); otherwise error ("kmeans: Unknown option %s", prop{1}); endswitch prop = {prop{3:end}}; endwhile ## Process options ## check for the 'emptyaction' property switch (emptyaction) case {"singleton", "error", "drop"} ; otherwise d = [", " disp(emptyaction)] (1:end-1); # strip trailing \n if (length (d) > 20) d = ""; endif error ("kmeans: unsupported empty cluster action parameter%s", d); endswitch ## check for the 'replicates' property if (! isnumeric (replicates) || ! isscalar (replicates) || ! isreal (replicates) || replicates < 1) d = [", " disp(replicates)] (1:end-1); # strip trailing \n if (length (d) > 20) d = ""; endif error ("kmeans: invalid number of replicates%s", d); endif ## check for the 'MaxIter' property if (! isnumeric (max_iter) || ! isscalar (max_iter) || ! isreal (max_iter) || max_iter < 1) d = [", " disp(max_iter)] (1:end-1); # strip trailing \n if (length (d) > 20) d = ""; endif error ("kmeans: invalid MaxIter%s", d); endif ## check for the 'start' property switch (lower (start)) case {"sample", "plus", "cluster"} start = lower (start); case {"uniform"} start = "uniform"; min_data = min (data); range = max (data) - min_data; otherwise if (! isnumeric (start)) d = [", " disp(start)] (1:end-1); # strip trailing \n if (length (d) > 20) d = ""; endif error ("kmeans: invalid start parameter%s", d); endif if (isempty (k)) k = rows (start); elseif (rows (start) != k) error ("kmeans: Number of initializers (%d) should match number of centroids (%d)", rows (start), k); endif if (replicates_set_explicitly) if (replicates != size (start, 3)) error ("kmeans: The third dimension of the initializer (%d) should match the number of replicates (%d)", size (start, 3), replicates); endif else replicates = size (start, 3); endif endswitch ## check for the 'distance' property ## dist returns the distance btwn each row of matrix x and a row vector c switch (lower (distance)) case "sqeuclidean" dist = @(x, c) (sumsq (bsxfun (@minus, x, c), 2)); centroid = @(x) (mean (x,1)); case "cityblock" dist = @(x, c) (sum (abs (bsxfun (@minus, x, c)), 2)); centroid = @(x) (median (x,1)); case "cosine" ## Pre-normalize all data. ## (when Octave implements normr, will use data = normr (data) ) for i = 1:rows (data) data(i,:) = data(i,:) / sqrt (sumsq (data(i,:))); endfor dist = @(x, c) (1 - (x * c') ./ sqrt (sumsq (c))); centroid = @(x) (mean (x,1)); ## already normalized case "correlation" ## Pre-normalize all data. data = data - mean (data, 2); ## (when Octave implements normr, will use data = normr (data) ) for i = 1:rows (data) data(i,:) = data(i,:) / sqrt (sumsq (data(i,:))); endfor dist = @(x, c) (1 - (x * (c-mean (c))') ./ sqrt (sumsq (c-mean (c)))); centroid = @(x) (mean (x,1)); ## already normalized case "hamming" dist = @(x, c) (sum (bsxfun (@ne, x, c), 2)); centroid = @(x) (median (x,1)); otherwise error ("kmeans: unsupported distance parameter %s", distance); endswitch ## Done processing options ######################################## ## Now that k has been set (possibly by 'replicates' option), check/use it. if (! isscalar (k)) error ("kmeans: second input argument must be a scalar"); endif ## used to hold the distances from each sample to each class D = zeros (n_rows, k); best = Inf; best_centers = []; for rep = 1:replicates ## check for the 'start' property switch (lower (start)) case "sample" idx = randperm (n_rows, k); centers = data(idx, :); case "plus" # k-means++, by Arthur and Vassilios(?) centers(1,:) = data(randi (n_rows),:); d = inf (n_rows, 1); # Distance to nearest centroid so far for i = 2:k d = min (d, dist (data, centers(i-1, :))); centers(i,:) = data(find (cumsum (d) > rand * sum (d), 1), :); endfor case "cluster" idx = randperm (n_rows, max (k, ceil (n_rows/10))); [~, centers] = kmeans (data(idx,:), k, "start", "sample", "distance", distance); case "uniform" # vectorised 'min_data + range .* rand' centers = bsxfun (@plus, min_data, bsxfun (@times, range, rand (k, columns (data)))); otherwise centers = start(:,:,rep); endswitch ## Run the algorithm iter = 1; ## Classify once before the loop; to set sumd, and if max_iter == 0 ## Compute distances and classify for i = 1:k D (:, i) = dist (data, centers(i, :)); endfor [~, classes] = min (D, [], 2); sumd = obj_cost (D, classes); while (err > 0.001 && iter++ <= max_iter) ## Calculate new centroids replaced_centroids = []; ## Used by "emptyaction = singleton" for i = 1:k ## Get binary vector indicating membership in cluster i membership = (classes == i); ## Check for empty clusters if (! any (membership)) switch emptyaction ## if 'singleton', then find the point that is the ## farthest from any centroid (and not replacing an empty cluster ## from earlier in this pass) and add it to the empty cluster case 'singleton' available = setdiff(1:n_rows, replaced_centroids); [~, idx] = max (min (D(available,:)')); idx = available(idx); replaced_centroids = [replaced_centroids, idx]; classes(idx) = i; membership(idx)=1; ## if 'drop' then set C and D to NA case 'drop' centers(i,:) = NA; D(i,:) = NA; ## if 'error' then throw the error otherwise error ("kmeans: empty cluster created"); endswitch endif ## end check for empty clusters ## update the centroids if (any (membership)) ## if we didn't "drop" the cluster centers(i, :) = centroid (data(membership, :)); endif endfor ## Compute distances for i = 1:k D (:, i) = dist (data, centers(i, :)); endfor ## Classify [~, classes] = min (D, [], 2); ## calculate the difference in the sum of distances new_sumd = obj_cost (D, classes); err = sum (sumd - new_sumd); ## update the current sum of distances sumd = new_sumd; endwhile if (sum (sumd) < sum (best) || isinf (best)) best = sumd; best_centers = centers; endif endfor centers = best_centers; sumd = best'; final_classes = NA (original_rows,1); final_classes(data_idx) = classes; ## other positions already NaN / NA classes = final_classes; endfunction ## calculate the sum of within-class distances function obj = obj_cost (D, classes) obj = zeros (1,columns (D)); for i = 1:columns (D) idx = (classes == i); obj(i) = sum (D(idx,i)); end endfunction ## Test input parsing %!error kmeans (rand (3,2), 4); %!test %! samples = 4; %! dims = 3; %! k = 2; %! [cls, c, d, z] = kmeans (rand (samples,dims), k, "start", rand (k,dims, 5), %! "emptyAction", "singleton"); %! assert (size (cls), [samples, 1]); %! assert (size (c), [k, dims]); %! assert (size (d), [k, 1]); %! assert (size (z), [samples, k]); %!test %! samples = 4; %! dims = 3; %! k = 2; %! [cls, c, d, z] = kmeans (rand (samples,dims), [], "start", rand (k,dims, 5), %! "emptyAction", "singleton"); %! assert (size (cls), [samples, 1]); %! assert (size (c), [k, dims]); %! assert (size (d), [k, 1]); %! assert (size (z), [samples, k]); %!test %! kmeans (rand (4,3), 2, "start", rand (2,3, 5), "replicates", 5, %! "emptyAction", "singleton"); %!error kmeans (rand (4,3), 2, "start", rand (2,3, 5), "replicates", 1); %!error kmeans (rand (4,3), 2, "start", rand (2,2)); %!test %! kmeans (rand (3,4), 2, "start", "sample", "emptyAction", "singleton"); %!test %! kmeans (rand (3,4), 2, "start", "plus", "emptyAction", "singleton"); %!test %! kmeans (rand (3,4), 2, "start", "cluster", "emptyAction", "singleton"); %!test %! kmeans (rand (3,4), 2, "start", "uniform", "emptyAction", "singleton"); %!error kmeans (rand (3,4), 2, "start", "normal"); %!error kmeans (rand (4,3), 2, "replicates", i); %!error kmeans (rand (4,3), 2, "replicates", -1); %!error kmeans (rand (4,3), 2, "replicates", []); %!error kmeans (rand (4,3), 2, "replicates", [1 2]); %!error kmeans (rand (4,3), 2, "replicates", "one"); %!error kmeans (rand (4,3), 2, "MAXITER", i); %!error kmeans (rand (4,3), 2, "MaxIter", -1); %!error kmeans (rand (4,3), 2, "maxiter", []); %!error kmeans (rand (4,3), 2, "maxiter", [1 2]); %!error kmeans (rand (4,3), 2, "maxiter", "one"); %!test %! kmeans (rand (4,3), 2, "distance", "sqeuclidean", "emptyAction", "singleton"); %!test %! kmeans (rand (4,3), 2, "distance", "cityblock", "emptyAction", "singleton"); %!test %! kmeans (rand (4,3), 2, "distance", "cosine", "emptyAction", "singleton"); %!test %! kmeans (rand (4,3), 2, "distance", "correlation", "emptyAction", "singleton"); %!test %! kmeans (rand (4,3), 2, "distance", "hamming", "emptyAction", "singleton"); %!error kmeans (rand (4,3), 2, "distance", "manhattan"); %!error kmeans ([1 0; 1.1 0], 2, "start", eye(2), "emptyaction", "error"); %!test %! kmeans ([1 0; 1.1 0], 2, "start", eye(2), "emptyaction", "singleton"); %!test %! [cls, c] = kmeans ([1 0; 2 0], 2, "start", [8,0;0,8], "emptyaction", "drop"); %! assert (cls, [1; 1]); %! assert (c, [1.5, 0; NA, NA]); %!error kmeans ([1 0; 1.1 0], 2, "start", eye(2), "emptyaction", "panic"); %!demo %! ## Generate a two-cluster problem %! C1 = randn (100, 2) + 1; %! C2 = randn (100, 2) - 1; %! data = [C1; C2]; %! %! ## Perform clustering %! [idx, centers] = kmeans (data, 2); %! %! ## Plot the result %! figure; %! plot (data (idx==1, 1), data (idx==1, 2), 'ro'); %! hold on; %! plot (data (idx==2, 1), data (idx==2, 2), 'bs'); %! plot (centers (:, 1), centers (:, 2), 'kv', 'markersize', 10); %! hold off; statistics-1.3.0/inst/linkage.m0000755000000000000000000002301212776476211014616 0ustar 00000000000000## Copyright (C) 2008 Francesco Potortì ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{y} =} linkage (@var{d}) ## @deftypefnx {Function File} {@var{y} =} linkage (@var{d}, @var{method}) ## @deftypefnx {Function File} @ ## {@var{y} =} linkage (@var{x}, @var{method}, @var{metric}) ## @deftypefnx {Function File} @ ## {@var{y} =} linkage (@var{x}, @var{method}, @var{arglist}) ## ## Produce a hierarchical clustering dendrogram ## ## @var{d} is the dissimilarity matrix relative to n observations, ## formatted as a @math{(n-1)*n/2}x1 vector as produced by @code{pdist}. ## Alternatively, @var{x} contains data formatted for input to ## @code{pdist}, @var{metric} is a metric for @code{pdist} and ## @var{arglist} is a cell array containing arguments that are passed to ## @code{pdist}. ## ## @code{linkage} starts by putting each observation into a singleton ## cluster and numbering those from 1 to n. Then it merges two ## clusters, chosen according to @var{method}, to create a new cluster ## numbered n+1, and so on until all observations are grouped into ## a single cluster numbered 2(n-1). Row k of the ## (m-1)x3 output matrix relates to cluster n+k: the first ## two columns are the numbers of the two component clusters and column ## 3 contains their distance. ## ## @var{method} defines the way the distance between two clusters is ## computed and how they are recomputed when two clusters are merged: ## ## @table @samp ## @item "single" (default) ## Distance between two clusters is the minimum distance between two ## elements belonging each to one cluster. Produces a cluster tree ## known as minimum spanning tree. ## ## @item "complete" ## Furthest distance between two elements belonging each to one cluster. ## ## @item "average" ## Unweighted pair group method with averaging (UPGMA). ## The mean distance between all pair of elements each belonging to one ## cluster. ## ## @item "weighted" ## Weighted pair group method with averaging (WPGMA). ## When two clusters A and B are joined together, the new distance to a ## cluster C is the mean between distances A-C and B-C. ## ## @item "centroid" ## Unweighted Pair-Group Method using Centroids (UPGMC). ## Assumes Euclidean metric. The distance between cluster centroids, ## each centroid being the center of mass of a cluster. ## ## @item "median" ## Weighted pair-group method using centroids (WPGMC). ## Assumes Euclidean metric. Distance between cluster centroids. When ## two clusters are joined together, the new centroid is the midpoint ## between the joined centroids. ## ## @item "ward" ## Ward's sum of squared deviations about the group mean (ESS). ## Also known as minimum variance or inner squared distance. ## Assumes Euclidean metric. How much the moment of inertia of the ## merged cluster exceeds the sum of those of the individual clusters. ## @end table ## ## @strong{Reference} ## Ward, J. H. Hierarchical Grouping to Optimize an Objective Function ## J. Am. Statist. Assoc. 1963, 58, 236-244, ## @url{http://iv.slis.indiana.edu/sw/data/ward.pdf}. ## @end deftypefn ## ## @seealso{pdist,squareform} ## Author: Francesco Potortì function dgram = linkage (d, method = "single", distarg) ## check the input if (nargin < 1) || (nargin > 3) print_usage (); endif if (isempty (d)) error ("linkage: d cannot be empty"); elseif ( nargin < 3 && ~ isvector (d)) error ("linkage: d must be a vector"); endif methods = struct ... ("name", { "single"; "complete"; "average"; "weighted"; "centroid"; "median"; "ward" }, "distfunc", {(@(x) min(x)) # single (@(x) max(x)) # complete (@(x,i,j,w) sum(diag(q=w([i,j]))*x)/sum(q)) # average (@(x) mean(x)) # weighted (@massdist) # centroid (@(x,i) massdist(x,i)) # median (@inertialdist) # ward }); mask = strcmp (lower (method), {methods.name}); if (! any (mask)) error ("linkage: %s: unknown method", method); endif dist = {methods.distfunc}{mask}; if (nargin == 3) if (ischar (distarg)) d = pdist (d, distarg); elseif (iscell (distarg)) d = pdist (d, distarg{:}); else print_usage (); endif endif d = squareform (d, "tomatrix"); # dissimilarity NxN matrix n = rows (d); # the number of observations diagidx = sub2ind ([n,n], 1:n, 1:n); # indices of diagonal elements d(diagidx) = Inf; # consider a cluster as far from itself ## For equal-distance nodes, the order in which clusters are ## merged is arbitrary. Rotating the initial matrix produces an ## ordering similar to Matlab's. cname = n:-1:1; # cluster names in d d = rot90 (d, 2); # exchange low and high cluster numbers weight = ones (1, n); # cluster weights dgram = zeros (n-1, 3); # clusters from n+1 to 2*n-1 for cluster = n+1:2*n-1 ## Find the two nearest clusters [m midx] = min (d(:)); [r, c] = ind2sub (size (d), midx); ## Here is the new cluster dgram(cluster-n, :) = [cname(r) cname(c) d(r, c)]; ## Put it in place of the first one and remove the second cname(r) = cluster; cname(c) = []; ## Compute the new distances newd = dist (d([r c], :), r, c, weight); newd(r) = Inf; # take care of the diagonal element ## Put distances in place of the first ones, remove the second ones d(r,:) = newd; d(:,r) = newd'; d(c,:) = []; d(:,c) = []; ## The new weight is the sum of the components' weights weight(r) += weight(c); weight(c) = []; endfor ## Sort the cluster numbers, as Matlab does dgram(:,1:2) = sort (dgram(:,1:2), 2); ## Check that distances are monotonically increasing if (any (diff (dgram(:,3)) < 0)) warning ("clustering", "linkage: cluster distances do not monotonically increase\n\ you should probably use a method different from \"%s\"", method); endif endfunction ## Take two row vectors, which are the Euclidean distances of clusters I ## and J from the others. Column I of second row contains the distance ## between clusters I and J. The centre of gravity of the new cluster ## is on the segment joining the old ones. W are the weights of all ## clusters. Use the law of cosines to find the distances of the new ## cluster from all the others. function y = massdist (x, i, j, w) x .^= 2; # squared Euclidean distances if (nargin == 2) # median distance qi = 0.5; # equal weights ("weighted") else # centroid distance qi = 1 / (1 + w(j) / w(i)); # proportional weights ("unweighted") endif y = sqrt (qi*x(1,:) + (1-qi)*(x(2,:) - qi*x(2,i))); endfunction ## Take two row vectors, which are the inertial distances of clusters I ## and J from the others. Column I of second row contains the inertial ## distance between clusters I and J. The centre of gravity of the new ## cluster K is on the segment joining I and J. W are the weights of ## all clusters. Convert inertial to Euclidean distances, then use the ## law of cosines to find the Euclidean distances of K from all the ## other clusters, convert them back to inertial distances and return ## them. function y = inertialdist (x, i, j, w) wi = w(i); wj = w(j); # the cluster weights s = [wi + w; wj + w]; # sum of weights for all cluster pairs p = [wi * w; wj * w]; # product of weights for all cluster pairs x = x.^2 .* s ./ p; # convert inertial dist. to squared Eucl. sij = wi + wj; # sum of weights of I and J qi = wi/sij; # normalise the weight of I ## Squared Euclidean distances between all clusters and new cluster K x = qi*x(1,:) + (1-qi)*(x(2,:) - qi*x(2,i)); y = sqrt (x * sij .* w ./ (sij + w)); # convert Eucl. dist. to inertial endfunction %!shared x, t %! x = reshape(mod(magic(6),5),[],3); %! t = 1e-6; %!assert (cond (linkage (pdist (x))), 34.119045,t); %!assert (cond (linkage (pdist (x), "complete")), 21.793345,t); %!assert (cond (linkage (pdist (x), "average")), 27.045012,t); %!assert (cond (linkage (pdist (x), "weighted")), 27.412889,t); %! lastwarn(); # Clear last warning before the test %!warning linkage (pdist (x), "centroid"); %!test warning off clustering %! assert (cond (linkage (pdist (x), "centroid")), 27.457477,t); %! warning on clustering %!warning linkage (pdist (x), "median"); %!test warning off clustering %! assert (cond (linkage (pdist (x), "median")), 27.683325,t); %! warning on clustering %!assert (cond (linkage (pdist (x), "ward")), 17.195198,t); %!assert (cond (linkage(x,"ward","euclidean")), 17.195198,t); %!assert (cond (linkage(x,"ward",{"euclidean"})), 17.195198,t); %!assert (cond (linkage(x,"ward",{"minkowski",2})),17.195198,t); statistics-1.3.0/inst/lognstat.m0000755000000000000000000000662612776476211015053 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} lognstat (@var{mu}, @var{sigma}) ## Compute mean and variance of the lognormal distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{mu} is the first parameter of the lognormal distribution ## ## @item ## @var{sigma} is the second parameter of the lognormal distribution. ## @var{sigma} must be positive or zero ## @end itemize ## @var{mu} and @var{sigma} must be of common size or one of them must be ## scalar ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the lognormal distribution ## ## @item ## @var{v} is the variance of the lognormal distribution ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## mu = 0:0.2:1; ## sigma = 0.2:0.2:1.2; ## [m, v] = lognstat (mu, sigma) ## @end group ## ## @group ## [m, v] = lognstat (0, sigma) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the lognormal distribution function [m, v] = lognstat (mu, sigma) # Check arguments if (nargin != 2) print_usage (); endif if (! isempty (mu) && ! ismatrix (mu)) error ("lognstat: mu must be a numeric matrix"); endif if (! isempty (sigma) && ! ismatrix (sigma)) error ("lognstat: sigma must be a numeric matrix"); endif if (! isscalar (mu) || ! isscalar (sigma)) [retval, mu, sigma] = common_size (mu, sigma); if (retval > 0) error ("lognstat: mu and sigma must be of common size or scalar"); endif endif # Calculate moments m = exp (mu + (sigma .^ 2) ./ 2); v = (exp (sigma .^ 2) - 1) .* exp (2 .* mu + sigma .^ 2); # Continue argument check k = find (! (sigma >= 0) | ! (sigma < Inf)); if (any (k)) m(k) = NaN; v(k) = NaN; endif endfunction %!test %! mu = 0:0.2:1; %! sigma = 0.2:0.2:1.2; %! [m, v] = lognstat (mu, sigma); %! expected_m = [1.0202, 1.3231, 1.7860, 2.5093, 3.6693, 5.5845]; %! expected_v = [0.0425, 0.3038, 1.3823, 5.6447, 23.1345, 100.4437]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); %!test %! sigma = 0.2:0.2:1.2; %! [m, v] = lognstat (0, sigma); %! expected_m = [1.0202, 1.0833, 1.1972, 1.3771, 1.6487, 2.0544]; %! expected_v = [0.0425, 0.2036, 0.6211, 1.7002, 4.6708, 13.5936]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); statistics-1.3.0/inst/mad.m0000755000000000000000000000544012776476211013752 0ustar 00000000000000## Copyright (C) 2001 Paul Kienzle ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} mad (@var{x}) ## @deftypefnx{Function File} mad (@var{x}, @var{flag}) ## @deftypefnx{Function File} mad (@var{x}, @var{flag}, @var{dim}) ## Compute the mean/median absolute deviation of @var{x}. ## ## The mean absolute deviation is computed as ## ## @example ## mean (abs (@var{x} - mean (@var{x}))) ## @end example ## ## and the median absolute deviation is computed as ## ## @example ## median (abs (@var{x} - median (@var{x}))) ## @end example ## ## Elements of @var{x} containing NaN or NA values are ignored during computations. ## ## If @var{flag} is 0, the absolute mean deviation is computed, and if @var{flag} ## is 1, the absolute median deviation is computed. By default @var{flag} is 0. ## ## This is done along the dimension @var{dim} of @var{x}. If this variable is not ## given, the mean/median absolute deviation s computed along the smallest dimension of ## @var{x}. ## ## @seealso{std} ## @end deftypefn function a = mad (X, flag = 0, dim = []) ## Check input if (nargin < 1) print_usage (); endif if (nargin > 3) error ("mad: too many input arguments"); endif if (!isnumeric (X)) error ("mad: first input must be numeric"); endif if (isempty (dim)) dim = min (find (size (X) > 1)); if (isempty(dim)) dim = 1; endif endif if (!isscalar (flag)) error ("mad: second input argument must be a scalar"); endif if (!isscalar (dim)) error ("mad: dimension argument must be a scalar"); endif if (flag == 0) f = @nanmean; else f = @nanmedian; endif ## Compute the mad if (prod(size(X)) != size(X,dim)) sz = ones (1, length (size (X))); sz (dim) = size (X,dim); a = f (abs (X - repmat (f (X, dim), sz)), dim); elseif (all (size (X) > 1)) a = f (abs (X - ones (size(X, 1), 1) * f (X, dim)), dim); else a = f (abs (X - f(X, dim)), dim); endif endfunction ## Tests %!assert (mad(1), 0); %!test %! X = eye(3); abs_mean = [4/9, 4/9, 4/9]; abs_median=[0,0,0]; %! assert(mad(X), abs_mean, eps); %! assert(mad(X, 0), abs_mean, eps); %! assert(mad(X,1), abs_median); statistics-1.3.0/inst/mahal.m0000644000000000000000000000525012776476211014267 0ustar 00000000000000## Copyright (C) 2015 Lachlan Andrew ## ## This program is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## This program, is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {Function File} {} mahal (@var{y}, @var{x}) ## Mahalanobis' D-square distance. ## ## Return the Mahalanobis' D-square distance of the points in ## @var{y} from the distribution implied by points @var{x}. ## ## Specifically, it uses a Cholesky decomposition to set ## ## @example ## answer(i) = (@var{y}(i,:) - mean (@var{x})) * inv (A) * (@var{y}(i,:)-mean (@var{x}))' ## @end example ## ## where A is the covariance of @var{x}. ## ## The data @var{x} and @var{y} must have the same number of components ## (columns), but may have a different number of observations (rows). ## ## @end deftypefn ## Author: Lachlan Andrew ## Created: September 2015 ## Based on function mahalanobis by Friedrich Leisch function retval = mahal (y, x) if (nargin != 2) print_usage (); endif if (! (isnumeric (x) || islogical (x)) || ! (isnumeric (y) || islogical (y))) error ("mahal: X and Y must be numeric matrices or vectors"); endif if (! ismatrix (x) || ! ismatrix (y)) error ("mahal: X and Y must be 2-D matrices or vectors"); endif [xr, xc] = size (x); [yr, yc] = size (y); if (xc != yc) error ("mahal: X and Y must have the same number of columns"); endif if (isinteger (x)) x = double (x); endif xm = mean (x, 1); ## Center data by subtracting mean of x x = bsxfun (@minus, x, xm); y = bsxfun (@minus, y, xm); w = (x' * x) / (xr - 1); retval = sumsq (y / chol (w), 2); endfunction ## Test input validation %!error mahal () %!error mahal (1, 2, 3) %!error mahal ("A", "B") %!error mahal ([1, 2], ["A", "B"]) %!error mahal (ones (2, 2, 2)) %!error mahal (ones (2, 2), ones (2, 2, 2)) %!error mahal (ones (2, 2), ones (2, 3)) %!test %! X = [1 0; 0 1; 1 1; 0 0]; %! assert (mahal (X, X), [1.5; 1.5; 1.5; 1.5], eps) %! assert (mahal (X, X+1), [7.5; 7.5; 1.5; 13.5], eps) %!assert (mahal ([true; true], [false; true]), [0.5; 0.5], eps) statistics-1.3.0/inst/mnpdf.m0000755000000000000000000000776612776476211014332 0ustar 00000000000000## Copyright (C) 2012 Arno Onken ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{y} =} mnpdf (@var{x}, @var{p}) ## Compute the probability density function of the multinomial distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{x} is vector with a single sample of a multinomial distribution with ## parameter @var{p} or a matrix of random samples from multinomial ## distributions. In the latter case, each row of @var{x} is a sample from a ## multinomial distribution with the corresponding row of @var{p} being its ## parameter. ## ## @item ## @var{p} is a vector with the probabilities of the categories or a matrix ## with each row containing the probabilities of a multinomial sample. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{y} is a vector of probabilites of the random samples @var{x} from the ## multinomial distribution with corresponding parameter @var{p}. The parameter ## @var{n} of the multinomial distribution is the sum of the elements of each ## row of @var{x}. The length of @var{y} is the number of columns of @var{x}. ## If a row of @var{p} does not sum to @code{1}, then the corresponding element ## of @var{y} will be @code{NaN}. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = [1, 4, 2]; ## p = [0.2, 0.5, 0.3]; ## y = mnpdf (x, p); ## @end group ## ## @group ## x = [1, 4, 2; 1, 0, 9]; ## p = [0.2, 0.5, 0.3; 0.1, 0.1, 0.8]; ## y = mnpdf (x, p); ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, 2001. ## ## @item ## Merran Evans, Nicholas Hastings and Brian Peacock. @cite{Statistical ## Distributions}. pages 134-136, Wiley, New York, third edition, 2000. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: PDF of the multinomial distribution function y = mnpdf (x, p) # Check arguments if (nargin != 2) print_usage (); endif if (! ismatrix (x) || any (x(:) < 0 | round (x(:) != x(:)))) error ("mnpdf: x must be a matrix of non-negative integer values"); endif if (! ismatrix (p) || any (p(:) < 0)) error ("mnpdf: p must be a non-empty matrix with rows of probabilities"); endif # Adjust input sizes if (! isvector (x) || ! isvector (p)) if (isvector (x)) x = x(:)'; endif if (isvector (p)) p = p(:)'; endif if (size (x, 1) == 1 && size (p, 1) > 1) x = repmat (x, size (p, 1), 1); elseif (size (x, 1) > 1 && size (p, 1) == 1) p = repmat (p, size (x, 1), 1); endif endif # Continue argument check if (any (size (x) != size (p))) error ("mnpdf: x and p must have compatible sizes"); endif # Count total number of elements of each multinomial sample n = sum (x, 2); # Compute probability density function of the multinomial distribution t = x .* log (p); t(x == 0) = 0; y = exp (gammaln (n+1) - sum (gammaln (x+1), 2) + sum (t, 2)); # Set invalid rows to NaN k = (abs (sum (p, 2) - 1) > 1e-6); y(k) = NaN; endfunction %!test %! x = [1, 4, 2]; %! p = [0.2, 0.5, 0.3]; %! y = mnpdf (x, p); %! assert (y, 0.11812, 0.001); %!test %! x = [1, 4, 2; 1, 0, 9]; %! p = [0.2, 0.5, 0.3; 0.1, 0.1, 0.8]; %! y = mnpdf (x, p); %! assert (y, [0.11812; 0.13422], 0.001); statistics-1.3.0/inst/mnrnd.m0000755000000000000000000001334612776476211014333 0ustar 00000000000000## Copyright (C) 2012 Arno Onken ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{x} =} mnrnd (@var{n}, @var{p}) ## @deftypefnx {Function File} {@var{x} =} mnrnd (@var{n}, @var{p}, @var{s}) ## Generate random samples from the multinomial distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{n} is the first parameter of the multinomial distribution. @var{n} can ## be scalar or a vector containing the number of trials of each multinomial ## sample. The elements of @var{n} must be non-negative integers. ## ## @item ## @var{p} is the second parameter of the multinomial distribution. @var{p} can ## be a vector with the probabilities of the categories or a matrix with each ## row containing the probabilities of a multinomial sample. If @var{p} has ## more than one row and @var{n} is non-scalar, then the number of rows of ## @var{p} must match the number of elements of @var{n}. ## ## @item ## @var{s} is the number of multinomial samples to be generated. @var{s} must ## be a non-negative integer. If @var{s} is specified, then @var{n} must be ## scalar and @var{p} must be a vector. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{x} is a matrix of random samples from the multinomial distribution with ## corresponding parameters @var{n} and @var{p}. Each row corresponds to one ## multinomial sample. The number of columns, therefore, corresponds to the ## number of columns of @var{p}. If @var{s} is not specified, then the number ## of rows of @var{x} is the maximum of the number of elements of @var{n} and ## the number of rows of @var{p}. If a row of @var{p} does not sum to @code{1}, ## then the corresponding row of @var{x} will contain only @code{NaN} values. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## n = 10; ## p = [0.2, 0.5, 0.3]; ## x = mnrnd (n, p); ## @end group ## ## @group ## n = 10 * ones (3, 1); ## p = [0.2, 0.5, 0.3]; ## x = mnrnd (n, p); ## @end group ## ## @group ## n = (1:2)'; ## p = [0.2, 0.5, 0.3; 0.1, 0.1, 0.8]; ## x = mnrnd (n, p); ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, 2001. ## ## @item ## Merran Evans, Nicholas Hastings and Brian Peacock. @cite{Statistical ## Distributions}. pages 134-136, Wiley, New York, third edition, 2000. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Random samples from the multinomial distribution function x = mnrnd (n, p, s) # Check arguments if (nargin == 3) if (! isscalar (n) || n < 0 || round (n) != n) error ("mnrnd: n must be a non-negative integer"); endif if (! isvector (p) || any (p < 0 | p > 1)) error ("mnrnd: p must be a vector of probabilities"); endif if (! isscalar (s) || s < 0 || round (s) != s) error ("mnrnd: s must be a non-negative integer"); endif elseif (nargin == 2) if (isvector (p) && size (p, 1) > 1) p = p'; endif if (! isvector (n) || any (n < 0 | round (n) != n) || size (n, 2) > 1) error ("mnrnd: n must be a non-negative integer column vector"); endif if (! ismatrix (p) || isempty (p) || any (p < 0 | p > 1)) error ("mnrnd: p must be a non-empty matrix with rows of probabilities"); endif if (! isscalar (n) && size (p, 1) > 1 && length (n) != size (p, 1)) error ("mnrnd: the length of n must match the number of rows of p"); endif else print_usage (); endif # Adjust input sizes if (nargin == 3) n = n * ones (s, 1); p = repmat (p(:)', s, 1); elseif (nargin == 2) if (isscalar (n) && size (p, 1) > 1) n = n * ones (size (p, 1), 1); elseif (size (p, 1) == 1) p = repmat (p, length (n), 1); endif endif sz = size (p); # Upper bounds of categories ub = cumsum (p, 2); # Make sure that the greatest upper bound is 1 gub = ub(:, end); ub(:, end) = 1; # Lower bounds of categories lb = [zeros(sz(1), 1) ub(:, 1:(end-1))]; # Draw multinomial samples x = zeros (sz); for i = 1:sz(1) # Draw uniform random numbers r = repmat (rand (n(i), 1), 1, sz(2)); # Compare the random numbers of r to the cumulated probabilities of p and # count the number of samples for each category x(i, :) = sum (r <= repmat (ub(i, :), n(i), 1) & r > repmat (lb(i, :), n(i), 1), 1); endfor # Set invalid rows to NaN k = (abs (gub - 1) > 1e-6); x(k, :) = NaN; endfunction %!test %! n = 10; %! p = [0.2, 0.5, 0.3]; %! x = mnrnd (n, p); %! assert (size (x), size (p)); %! assert (all (x >= 0)); %! assert (all (round (x) == x)); %! assert (sum (x) == n); %!test %! n = 10 * ones (3, 1); %! p = [0.2, 0.5, 0.3]; %! x = mnrnd (n, p); %! assert (size (x), [length(n), length(p)]); %! assert (all (x >= 0)); %! assert (all (round (x) == x)); %! assert (all (sum (x, 2) == n)); %!test %! n = (1:2)'; %! p = [0.2, 0.5, 0.3; 0.1, 0.1, 0.8]; %! x = mnrnd (n, p); %! assert (size (x), size (p)); %! assert (all (x >= 0)); %! assert (all (round (x) == x)); %! assert (all (sum (x, 2) == n)); statistics-1.3.0/inst/monotone_smooth.m0000755000000000000000000001245612776476211016445 0ustar 00000000000000## Copyright (C) 2011 Nir Krakauer ## Copyright (C) 2011 Carnë Draug ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{yy} =} monotone_smooth (@var{x}, @var{y}, @var{h}) ## Produce a smooth monotone increasing approximation to a sampled functional ## dependence y(x) using a kernel method (an Epanechnikov smoothing kernel is ## applied to y(x); this is integrated to yield the monotone increasing form. ## See Reference 1 for details.) ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{x} is a vector of values of the independent variable. ## ## @item ## @var{y} is a vector of values of the dependent variable, of the same size as ## @var{x}. For best performance, it is recommended that the @var{y} already be ## fairly smooth, e.g. by applying a kernel smoothing to the original values if ## they are noisy. ## ## @item ## @var{h} is the kernel bandwidth to use. If @var{h} is not given, a "reasonable" ## value is computed. ## ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{yy} is the vector of smooth monotone increasing function values at @var{x}. ## ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = 0:0.1:10; ## y = (x .^ 2) + 3 * randn(size(x)); %typically non-monotonic from the added noise ## ys = ([y(1) y(1:(end-1))] + y + [y(2:end) y(end)])/3; %crudely smoothed via ## moving average, but still typically non-monotonic ## yy = monotone_smooth(x, ys); %yy is monotone increasing in x ## plot(x, y, '+', x, ys, x, yy) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Holger Dette, Natalie Neumeyer and Kay F. Pilz (2006), A simple nonparametric ## estimator of a strictly monotone regression function, @cite{Bernoulli}, 12:469-490 ## @item ## Regine Scheder (2007), R Package 'monoProc', Version 1.0-6, ## @url{http://cran.r-project.org/web/packages/monoProc/monoProc.pdf} (The ## implementation here is based on the monoProc function mono.1d) ## @end enumerate ## @end deftypefn ## Author: Nir Krakauer ## Description: Nonparametric monotone increasing regression function yy = monotone_smooth (x, y, h) if (nargin < 2 || nargin > 3) print_usage (); elseif (!isnumeric (x) || !isvector (x)) error ("first argument x must be a numeric vector") elseif (!isnumeric (y) || !isvector (y)) error ("second argument y must be a numeric vector") elseif (numel (x) != numel (y)) error ("x and y must have the same number of elements") elseif (nargin == 3 && (!isscalar (h) || !isnumeric (h))) error ("third argument 'h' (kernel bandwith) must a numeric scalar") endif n = numel(x); %set filter bandwidth at a reasonable default value, if not specified if (nargin != 3) s = std(x); h = s / (n^0.2); end x_min = min(x); x_max = max(x); y_min = min(y); y_max = max(y); %transform range of x to [0, 1] xl = (x - x_min) / (x_max - x_min); yy = ones(size(y)); %Epanechnikov smoothing kernel (with finite support) %K_epanech_kernel = @(z) (3/4) * ((1 - z).^2) .* (abs(z) < 1); K_epanech_int = @(z) mean(((abs(z) < 1)/2) - (3/4) * (z .* (abs(z) < 1) - (1/3) * (z.^3) .* (abs(z) < 1)) + (z < -1)); %integral of kernels up to t monotone_inverse = @(t) K_epanech_int((y - t) / h); %find the value of the monotone smooth function at each point in x niter_max = 150; %maximum number of iterations for estimating each value (should not be reached in most cases) for l = 1:n tmax = y_max; tmin = y_min; wmin = monotone_inverse(tmin); wmax = monotone_inverse(tmax); if (wmax == wmin) yy(l) = tmin; else wt = xl(l); iter_max_reached = 1; for i = 1:niter_max wt_scaled = (wt - wmin) / (wmax - wmin); tn = tmin + wt_scaled * (tmax - tmin) ; wn = monotone_inverse(tn); wn_scaled = (wn - wmin) / (wmax - wmin); %if (abs(wt-wn) < 1E-4) || (tn < (y_min-0.1)) || (tn > (y_max+0.1)) %% criterion for break in the R code -- replaced by the following line to %% hopefully be less dependent on the scale of y if (abs(wt_scaled-wn_scaled) < 1E-4) || (wt_scaled < -0.1) || (wt_scaled > 1.1) iter_max_reached = 0; break endif if wn > wt tmax = tn; wmax = wn; else tmin = tn; wmin = wn; endif endfor if iter_max_reached warning("at x = %g, maximum number of iterations %d reached without convergence; approximation may not be optimal", x(l), niter_max) endif yy(l) = tmin + (wt - wmin) * (tmax - tmin) / (wmax - wmin); endif endfor endfunction statistics-1.3.0/inst/mvncdf.m0000755000000000000000000001115012776476211014461 0ustar 00000000000000## Copyright (C) 2008 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{p} =} mvncdf (@var{x}, @var{mu}, @var{sigma}) ## @deftypefnx {Function File} {} mvncdf (@var{a}, @var{x}, @var{mu}, @var{sigma}) ## @deftypefnx {Function File} {[@var{p}, @var{err}] =} mvncdf (@dots{}) ## Compute the cumulative distribution function of the multivariate ## normal distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{x} is the upper limit for integration where each row corresponds ## to an observation. ## ## @item ## @var{mu} is the mean. ## ## @item ## @var{sigma} is the correlation matrix. ## ## @item ## @var{a} is the lower limit for integration where each row corresponds ## to an observation. @var{a} must have the same size as @var{x}. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{p} is the cumulative distribution at each row of @var{x} and ## @var{a}. ## ## @item ## @var{err} is the estimated error. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = [1 2]; ## mu = [0.5 1.5]; ## sigma = [1.0 0.5; 0.5 1.0]; ## p = mvncdf (x, mu, sigma) ## @end group ## ## @group ## a = [-inf 0]; ## p = mvncdf (a, x, mu, sigma) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Alan Genz and Frank Bretz. Numerical Computation of Multivariate ## t-Probabilities with Application to Power Calculation of Multiple ## Constrasts. @cite{Journal of Statistical Computation and Simulation}, ## 63, pages 361-378, 1999. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: CDF of the multivariate normal distribution function [p, err] = mvncdf (varargin) # Monte-Carlo confidence factor for the standard error: 99 % gamma = 2.5; # Tolerance err_eps = 1e-3; if (length (varargin) == 1) x = varargin{1}; mu = []; sigma = eye (size (x, 2)); a = -Inf .* ones (size (x)); elseif (length (varargin) == 3) x = varargin{1}; mu = varargin{2}; sigma = varargin{3}; a = -Inf .* ones (size (x)); elseif (length (varargin) == 4) a = varargin{1}; x = varargin{2}; mu = varargin{3}; sigma = varargin{4}; else print_usage (); endif # Dimension q = size (sigma, 1); cases = size (x, 1); # Default value for mu if (isempty (mu)) mu = zeros (1, q); endif # Check parameters if (size (x, 2) != q) error ("mvncdf: x must have the same number of columns as sigma"); endif if (any (size (x) != size (a))) error ("mvncdf: a must have the same size as x"); endif if (isscalar (mu)) mu = ones (1, q) .* mu; elseif (! isvector (mu) || size (mu, 2) != q) error ("mvncdf: mu must be a scalar or a vector with the same number of columns as x"); endif x = x - repmat (mu, cases, 1); if (q < 1 || size (sigma, 2) != q || any (any (sigma != sigma')) || min (eig (sigma)) <= 0) error ("mvncdf: sigma must be nonempty symmetric positive definite"); endif c = chol (sigma)'; # Number of integral transformations n = 1; p = zeros (cases, 1); varsum = zeros (cases, 1); err = ones (cases, 1) .* err_eps; # Apply crude Monte-Carlo estimation while any (err >= err_eps) # Sample from q-1 dimensional unit hypercube w = rand (cases, q - 1); # Transformation of the multivariate normal integral dvev = normcdf ([a(:, 1) / c(1, 1), x(:, 1) / c(1, 1)]); dv = dvev(:, 1); ev = dvev(:, 2); fv = ev - dv; y = zeros (cases, q - 1); for i = 1:(q - 1) y(:, i) = norminv (dv + w(:, i) .* (ev - dv)); dvev = normcdf ([(a(:, i + 1) - c(i + 1, 1:i) .* y(:, 1:i)) ./ c(i + 1, i + 1), (x(:, i + 1) - c(i + 1, 1:i) .* y(:, 1:i)) ./ c(i + 1, i + 1)]); dv = dvev(:, 1); ev = dvev(:, 2); fv = (ev - dv) .* fv; endfor n++; # Estimate standard error varsum += (n - 1) .* ((fv - p) .^ 2) ./ n; err = gamma .* sqrt (varsum ./ (n .* (n - 1))); p += (fv - p) ./ n; endwhile endfunction statistics-1.3.0/inst/mvnpdf.m0000755000000000000000000000743512776476211014511 0ustar 00000000000000## Author: Paul Kienzle ## This program is granted to the public domain. ## -*- texinfo -*- ## @deftypefn {Function File} {@var{y} =} mvnpdf (@var{x}) ## @deftypefnx{Function File} {@var{y} =} mvnpdf (@var{x}, @var{mu}) ## @deftypefnx{Function File} {@var{y} =} mvnpdf (@var{x}, @var{mu}, @var{sigma}) ## Compute multivariate normal pdf for @var{x} given mean @var{mu} and covariance matrix ## @var{sigma}. The dimension of @var{x} is @var{d} x @var{p}, @var{mu} is ## @var{1} x @var{p} and @var{sigma} is @var{p} x @var{p}. The normal pdf is ## defined as ## ## @example ## @iftex ## @tex ## $$ 1/y^2 = (2 pi)^p |\Sigma| \exp \{ (x-\mu)^T \Sigma^{-1} (x-\mu) \} $$ ## @end tex ## @end iftex ## @ifnottex ## 1/@var{y}^2 = (2 pi)^@var{p} |@var{Sigma}| exp @{ (@var{x}-@var{mu})' inv(@var{Sigma})@ ## (@var{x}-@var{mu}) @} ## @end ifnottex ## @end example ## ## @strong{References} ## ## NIST Engineering Statistics Handbook 6.5.4.2 ## http://www.itl.nist.gov/div898/handbook/pmc/section5/pmc542.htm ## ## @strong{Algorithm} ## ## Using Cholesky factorization on the positive definite covariance matrix: ## ## @example ## @var{r} = chol (@var{sigma}); ## @end example ## ## where @var{r}'*@var{r} = @var{sigma}. Being upper triangular, the determinant ## of @var{r} is trivially the product of the diagonal, and the determinant of ## @var{sigma} is the square of this: ## ## @example ## @var{det} = prod (diag (@var{r}))^2; ## @end example ## ## The formula asks for the square root of the determinant, so no need to ## square it. ## ## The exponential argument @var{A} = @var{x}' * inv (@var{sigma}) * @var{x} ## ## @example ## @var{A} = @var{x}' * inv (@var{sigma}) * @var{x} ## = @var{x}' * inv (@var{r}' * @var{r}) * @var{x} ## = @var{x}' * inv (@var{r}) * inv(@var{r}') * @var{x} ## @end example ## ## Given that inv (@var{r}') == inv(@var{r})', at least in theory if not numerically, ## ## @example ## @var{A} = (@var{x}' / @var{r}) * (@var{x}'/@var{r})' = sumsq (@var{x}'/@var{r}) ## @end example ## ## The interface takes the parameters to the multivariate normal in columns rather than ## rows, so we are actually dealing with the transpose: ## ## @example ## @var{A} = sumsq (@var{x}/r) ## @end example ## ## and the final result is: ## ## @example ## @var{r} = chol (@var{sigma}) ## @var{y} = (2*pi)^(-@var{p}/2) * exp (-sumsq ((@var{x}-@var{mu})/@var{r}, 2)/2) / prod (diag (@var{r})) ## @end example ## ## @seealso{mvncdf, mvnrnd} ## @end deftypefn function pdf = mvnpdf (x, mu = 0, sigma = 1) ## Check input if (!ismatrix (x)) error ("mvnpdf: first input must be a matrix"); endif if (!isvector (mu) && !isscalar (mu)) error ("mvnpdf: second input must be a real scalar or vector"); endif if (!ismatrix (sigma) || !issquare (sigma)) error ("mvnpdf: third input must be a square matrix"); endif [ps, ps] = size (sigma); [d, p] = size (x); if (p != ps) error ("mvnpdf: dimensions of data and covariance matrix does not match"); endif if (numel (mu) != p && numel (mu) != 1) error ("mvnpdf: dimensions of data does not match dimensions of mean value"); endif mu = mu (:).'; if (all (size (mu) == [1, p])) mu = repmat (mu, [d, 1]); endif if (nargin < 3) pdf = (2*pi)^(-p/2) * exp (-sumsq (x-mu, 2)/2); else r = chol (sigma); pdf = (2*pi)^(-p/2) * exp (-sumsq ((x-mu)/r, 2)/2) / prod (diag (r)); endif endfunction %!demo %! mu = [0, 0]; %! sigma = [1, 0.1; 0.1, 0.5]; %! [X, Y] = meshgrid (linspace (-3, 3, 25)); %! XY = [X(:), Y(:)]; %! Z = mvnpdf (XY, mu, sigma); %! mesh (X, Y, reshape (Z, size (X))); %! colormap jet %!test %! mu = [1,-1]; %! sigma = [.9 .4; .4 .3]; %! x = [ 0.5 -1.2; -0.5 -1.4; 0 -1.5]; %! p = [ 0.41680003660313; 0.10278162359708; 0.27187267524566 ]; %! q = mvnpdf (x, mu, sigma); %! assert (p, q, 10*eps); statistics-1.3.0/inst/mvnrnd.m0000755000000000000000000001332712776476211014520 0ustar 00000000000000## Copyright (C) 2003 Iain Murray ## ## This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License along with this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} @var{s} = mvnrnd (@var{mu}, @var{Sigma}) ## @deftypefnx{Function File} @var{s} = mvnrnd (@var{mu}, @var{Sigma}, @var{n}) ## @deftypefnx{Function File} @var{s} = mvnrnd (@dots{}, @var{tol}) ## Draw @var{n} random @var{d}-dimensional vectors from a multivariate Gaussian distribution with mean @var{mu}(@var{n}x@var{d}) and covariance matrix ## @var{Sigma}(@var{d}x@var{d}). ## ## @var{mu} must be @var{n}-by-@var{d} (or 1-by-@var{d} if @var{n} is given) or a scalar. ## ## If the argument @var{tol} is given the eigenvalues of @var{Sigma} are checked for positivity against -100*tol. The default value of tol is @code{eps*norm (Sigma, "fro")}. ## ## @end deftypefn function s = mvnrnd (mu, Sigma, K, tol=eps*norm (Sigma, "fro")) % Iain Murray 2003 -- I got sick of this simple thing not being in Octave and locking up a stats-toolbox license in Matlab for no good reason. % May 2004 take a third arg, cases. Makes it more compatible with Matlab's. % Paul Kienzle % * Add GPL notice. % * Add docs for argument K % 2012 Juan Pablo Carbajal % * Uses Octave 3.6.2 broadcast. % * Stabilizes chol by perturbing Sigma with a epsilon multiple of the identity. % The effect on the generated samples is to add additional independent noise of variance epsilon. Ref: GPML Rasmussen & Williams. 2006. pp 200-201 % * Improved doc. % * Added tolerance to the positive definite check % * Used chol with option 'upper'. % 2014 Nir Krakauer % * Add tests. % * Allow mu to be scalar, in which case it's assumed that all elements share this mean. %perform some input checking if ~issquare (Sigma) error ('Sigma must be a square covariance matrix.'); end d = size(Sigma, 1); % If mu is column vector and Sigma not a scalar then assume user didn't read help but let them off and flip mu. Don't be more liberal than this or it will encourage errors (eg what should you do if mu is square?). if (size (mu, 2) == 1) && (d != 1) mu = mu'; end if nargin >= 3 n = K; else n = size(mu, 1); %1 if mu is scalar end if (~isscalar (mu)) && any(size (mu) != [1,d]) && any(size (mu) != [n,d]) error ('mu must be nxd, 1xd, or scalar, where Sigma has dimensions dxd.'); end warning ("off", "Octave:broadcast","local"); try U = chol (Sigma + tol*eye (d),"upper"); catch [E , Lambda] = eig (Sigma); if min (diag (Lambda)) < -100*tol error('Sigma must be positive semi-definite. Lowest eigenvalue %g', ... min (diag (Lambda))); else Lambda(Lambda<0) = 0; end warning ("mvnrnd:InvalidInput","Cholesky factorization failed. Using diagonalized matrix.") U = sqrt (Lambda) * E'; end s = randn(n,d)*U + mu; warning ("on", "Octave:broadcast"); endfunction % {{{ END OF CODE --- Guess I should provide an explanation: % % We can draw from axis aligned unit Gaussians with randn(d) % x ~ A*exp(-0.5*x'*x) % We can then rotate this distribution using % y = U'*x % Note that % x = inv(U')*y % Our new variable y is distributed according to: % y ~ B*exp(-0.5*y'*inv(U'*U)*y) % or % y ~ N(0,Sigma) % where % Sigma = U'*U % For a given Sigma we can use the chol function to find the corresponding U, % draw x and find y. We can adjust for a non-zero mean by just adding it on. % % But the Cholsky decomposition function doesn't always work... % Consider Sigma=[1 1;1 1]. Now inv(Sigma) doesn't actually exist, but Matlab's % mvnrnd provides samples with this covariance st x(1)~N(0,1) x(2)=x(1). The % fast way to deal with this would do something similar to chol but be clever % when the rows aren't linearly independent. However, I can't be bothered, so % another way of doing the decomposition is by diagonalising Sigma (which is % slower but works). % if % [E,Lambda]=eig(Sigma) % then % Sigma = E*Lambda*E' % so % U = sqrt(Lambda)*E' % If any Lambdas are negative then Sigma just isn't even positive semi-definite % so we can give up. % % Paul Kienzle adds: % Where it exists, chol(Sigma) is numerically well behaved. chol(hilb(12)) for doubles and for 100 digit floating point differ in the last digit. % Where chol(Sigma) doesn't exist, X*sqrt(Lambda)*E' will be somewhat accurate. For example, the elements of sqrt(Lambda)*E' for hilb(12), hilb(55) and hilb(120) are accurate to around 1e-8 or better. This was tested using the TNT+JAMA for eig and chol templates, and qlib for 100 digit precision. % }}} %!shared m, n, C, rho %! m = 10; n = 3; rho = 0.4; C = rho*ones(n, n) + (1 - rho)*eye(n); %!assert(size(mvnrnd(0, C, m)), [m n]) %!assert(size(mvnrnd(zeros(1, n), C, m)), [m n]) %!assert(size(mvnrnd(zeros(n, 1), C, m)), [m n]) %!assert(size(mvnrnd(zeros(m, n), C, m)), [m n]) %!assert(size(mvnrnd(zeros(m, n), C)), [m n]) %!assert(size(mvnrnd(zeros(1, n), C)), [1 n]) %!assert(size(mvnrnd(zeros(n, 1), C)), [1 n]) %!error(mvnrnd(zeros(m+1, n), C, m)) %!error(mvnrnd(zeros(1, n+1), C, m)) %!error(mvnrnd(zeros(n+1, 1), C, m)) %!error(mvnrnd(zeros(m, n), eye(n+1), m)) %!error(mvnrnd(zeros(m, n), eye(n+1, n), m)) statistics-1.3.0/inst/mvtcdf.m0000755000000000000000000001123412776476211014472 0ustar 00000000000000## Copyright (C) 2008 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{p} =} mvtcdf (@var{x}, @var{sigma}, @var{nu}) ## @deftypefnx {Function File} {} mvtcdf (@var{a}, @var{x}, @var{sigma}, @var{nu}) ## @deftypefnx {Function File} {[@var{p}, @var{err}] =} mvtcdf (@dots{}) ## Compute the cumulative distribution function of the multivariate ## Student's t distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{x} is the upper limit for integration where each row corresponds ## to an observation. ## ## @item ## @var{sigma} is the correlation matrix. ## ## @item ## @var{nu} is the degrees of freedom. ## ## @item ## @var{a} is the lower limit for integration where each row corresponds ## to an observation. @var{a} must have the same size as @var{x}. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{p} is the cumulative distribution at each row of @var{x} and ## @var{a}. ## ## @item ## @var{err} is the estimated error. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = [1 2]; ## sigma = [1.0 0.5; 0.5 1.0]; ## nu = 4; ## p = mvtcdf (x, sigma, nu) ## @end group ## ## @group ## a = [-inf 0]; ## p = mvtcdf (a, x, sigma, nu) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Alan Genz and Frank Bretz. Numerical Computation of Multivariate ## t-Probabilities with Application to Power Calculation of Multiple ## Constrasts. @cite{Journal of Statistical Computation and Simulation}, ## 63, pages 361-378, 1999. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: CDF of the multivariate Student's t distribution function [p, err] = mvtcdf (varargin) # Monte-Carlo confidence factor for the standard error: 99 % gamma = 2.5; # Tolerance err_eps = 1e-3; if (length (varargin) == 3) x = varargin{1}; sigma = varargin{2}; nu = varargin{3}; a = -Inf .* ones (size (x)); elseif (length (varargin) == 4) a = varargin{1}; x = varargin{2}; sigma = varargin{3}; nu = varargin{4}; else print_usage (); endif # Dimension q = size (sigma, 1); cases = size (x, 1); # Check parameters if (size (x, 2) != q) error ("mvtcdf: x must have the same number of columns as sigma"); endif if (any (size (x) != size (a))) error ("mvtcdf: a must have the same size as x"); endif if (! isscalar (nu) && (! isvector (nu) || length (nu) != cases)) error ("mvtcdf: nu must be a scalar or a vector with the same number of rows as x"); endif # Convert to correlation matrix if necessary if (any (diag (sigma) != 1)) svar = repmat (diag (sigma), 1, q); sigma = sigma ./ sqrt (svar .* svar'); endif if (q < 1 || size (sigma, 2) != q || any (any (sigma != sigma')) || min (eig (sigma)) <= 0) error ("mvtcdf: sigma must be nonempty symmetric positive definite"); endif nu = nu(:); c = chol (sigma)'; # Number of integral transformations n = 1; p = zeros (cases, 1); varsum = zeros (cases, 1); err = ones (cases, 1) .* err_eps; # Apply crude Monte-Carlo estimation while any (err >= err_eps) # Sample from q-1 dimensional unit hypercube w = rand (cases, q - 1); # Transformation of the multivariate t-integral dvev = tcdf ([a(:, 1) / c(1, 1), x(:, 1) / c(1, 1)], nu); dv = dvev(:, 1); ev = dvev(:, 2); fv = ev - dv; y = zeros (cases, q - 1); for i = 1:(q - 1) y(:, i) = tinv (dv + w(:, i) .* (ev - dv), nu + i - 1) .* sqrt ((nu + sum (y(:, 1:(i-1)) .^ 2, 2)) ./ (nu + i - 1)); tf = (sqrt ((nu + i) ./ (nu + sum (y(:, 1:i) .^ 2, 2)))) ./ c(i + 1, i + 1); dvev = tcdf ([(a(:, i + 1) - c(i + 1, 1:i) .* y(:, 1:i)) .* tf, (x(:, i + 1) - c(i + 1, 1:i) .* y(:, 1:i)) .* tf], nu + i); dv = dvev(:, 1); ev = dvev(:, 2); fv = (ev - dv) .* fv; endfor n++; # Estimate standard error varsum += (n - 1) .* ((fv - p) .^ 2) ./ n; err = gamma .* sqrt (varsum ./ (n .* (n - 1))); p += (fv - p) ./ n; endwhile endfunction statistics-1.3.0/inst/mvtpdf.m0000755000000000000000000000746212776476211014517 0ustar 00000000000000## Copyright (C) 2015 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{p} =} mvtpdf (@var{x}, @var{sigma}, @var{nu}) ## Compute the probability density function of the multivariate Student's t distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{x} are the points at which to find the probability, where each row corresponds ## to an observation. (@var{n} by @var{d} matrix) ## ## @item ## @var{sigma} is the scale matrix. (@var{d} by @var{d} symmetric positive definite matrix) ## ## @item ## @var{nu} is the degrees of freedom. (scalar or @var{n} vector) ## ## @end itemize ## ## The distribution is assumed to be centered (zero mean). ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{p} is the probability density for each row of @var{x}. (@var{n} by 1 vector) ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = [1 2]; ## sigma = [1.0 0.5; 0.5 1.0]; ## nu = 4; ## p = mvtpdf (x, sigma, nu) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Michael Roth, On the Multivariate t Distribution, Technical report from Automatic Control at Linkoepings universitet, @url{http://users.isy.liu.se/en/rt/roth/student.pdf} ## @end enumerate ## @end deftypefn ## Author: Nir Krakauer ## Description: PDF of the multivariate Student's t distribution function [p] = mvtpdf (x, sigma, nu) if (nargin != 3) print_usage (); endif # Dimensions d = size (sigma, 1); n = size (x, 1); # Check parameters if (size (x, 2) != d) error ("mvtpdf: x must have the same number of columns as sigma"); endif if (! isscalar (nu) && (! isvector (nu) || numel (nu) != n)) error ("mvtpdf: nu must be a scalar or a vector with the same number of rows as x"); endif if (d < 1 || size (sigma, 2) != d || ! issymmetric (sigma)) error ("mvtpdf: sigma must be nonempty and symmetric"); endif try U = chol (sigma); catch error ("mvtpdf: sigma must be positive definite"); end_try_catch nu = nu(:); sqrt_det_sigma = prod(diag(U)); #square root of determinant of sigma c = (gamma((nu+d)/2) ./ gamma(nu/2)) ./ (sqrt_det_sigma * (nu*pi).^(d/2)); #scale factor for PDF p = c ./ ((1 + sumsq(U' \ x') ./ nu') .^ ((nu' + d)/2))'; #note: sumsq(U' \ x') is equivalent to the quadratic form x*inv(sigma)*x' endfunction #test results verified with R mvtnorm package dmvt function %!assert (mvtpdf ([0 0], eye(2), 1), 0.1591549, 1E-7) #dmvt(x = c(0,0), sigma = diag(2), log = FALSE) %!assert (mvtpdf ([1 0], [1 0.5; 0.5 1], 2), 0.06615947, 1E-7) #dmvt(x = c(1,0), sigma = matrix(c(1, 0.5, 0.5, 1), nrow=2, ncol=2), df = 2, log = FALSE) %!assert (mvtpdf ([1 0.4 0; 1.2 0.5 0.5; 1.4 0.6 1], [1 0.5 0.3; 0.5 1 0.6; 0.3 0.6 1], [5 6 7]), [0.04713313 0.03722421 0.02069011]', 1E-7) #dmvt(x = c(1,0.4,0), sigma = matrix(c(1, 0.5, 0.3, 0.5, 1, 0.6, 0.3, 0.6, 1), nrow=3, ncol=3), df = 5, log = FALSE); dmvt(x = c(1.2,0.5,0.5), sigma = matrix(c(1, 0.5, 0.3, 0.5, 1, 0.6, 0.3, 0.6, 1), nrow=3, ncol=3), df = 6, log = FALSE); dmvt(x = c(1.4,0.6,1), sigma = matrix(c(1, 0.5, 0.3, 0.5, 1, 0.6, 0.3, 0.6, 1), nrow=3, ncol=3), df = 7, log = FALSE) statistics-1.3.0/inst/mvtrnd.m0000755000000000000000000001032212776476211014516 0ustar 00000000000000## Copyright (C) 2012 Arno Onken , Iñigo Urteaga ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{x} =} mvtrnd (@var{sigma}, @var{nu}) ## @deftypefnx {Function File} {@var{x} =} mvtrnd (@var{sigma}, @var{nu}, @var{n}) ## Generate random samples from the multivariate t-distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{sigma} is the matrix of correlation coefficients. If there are any ## non-unit diagonal elements then @var{sigma} will be normalized, so that the ## resulting covariance of the obtained samples @var{x} follows: ## @code{cov (x) = nu/(nu-2) * sigma ./ (sqrt (diag (sigma) * diag (sigma)))}. ## In order to obtain samples distributed according to a standard multivariate ## t-distribution, @var{sigma} must be equal to the identity matrix. To generate ## multivariate t-distribution samples @var{x} with arbitrary covariance matrix ## @var{sigma}, the following scaling might be used: ## @code{x = mvtrnd (sigma, nu, n) * diag (sqrt (diag (sigma)))}. ## ## @item ## @var{nu} is the degrees of freedom for the multivariate t-distribution. ## @var{nu} must be a vector with the same number of elements as samples to be ## generated or be scalar. ## ## @item ## @var{n} is the number of rows of the matrix to be generated. @var{n} must be ## a non-negative integer and corresponds to the number of samples to be ## generated. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{x} is a matrix of random samples from the multivariate t-distribution ## with @var{n} row samples. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## sigma = [1, 0.5; 0.5, 1]; ## nu = 3; ## n = 10; ## x = mvtrnd (sigma, nu, n); ## @end group ## ## @group ## sigma = [1, 0.5; 0.5, 1]; ## nu = [2; 3]; ## n = 2; ## x = mvtrnd (sigma, nu, 2); ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, 2001. ## ## @item ## Samuel Kotz and Saralees Nadarajah. @cite{Multivariate t Distributions and ## Their Applications}. Cambridge University Press, Cambridge, 2004. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Random samples from the multivariate t-distribution function x = mvtrnd (sigma, nu, n) # Check arguments if (nargin < 2) print_usage (); endif if (! ismatrix (sigma) || any (any (sigma != sigma')) || min (eig (sigma)) <= 0) error ("mvtrnd: sigma must be a positive definite matrix"); endif if (!isvector (nu) || any (nu <= 0)) error ("mvtrnd: nu must be a positive scalar or vector"); endif nu = nu(:); if (nargin > 2) if (! isscalar (n) || n < 0 | round (n) != n) error ("mvtrnd: n must be a non-negative integer") endif if (isscalar (nu)) nu = nu * ones (n, 1); else if (length (nu) != n) error ("mvtrnd: n must match the length of nu") endif endif else n = length (nu); endif # Normalize sigma if (any (diag (sigma) != 1)) sigma = sigma ./ sqrt (diag (sigma) * diag (sigma)'); endif # Dimension d = size (sigma, 1); # Draw samples y = mvnrnd (zeros (1, d), sigma, n); u = repmat (chi2rnd (nu), 1, d); x = y .* sqrt (repmat (nu, 1, d) ./ u); endfunction %!test %! sigma = [1, 0.5; 0.5, 1]; %! nu = 3; %! n = 10; %! x = mvtrnd (sigma, nu, n); %! assert (size (x), [10, 2]); %!test %! sigma = [1, 0.5; 0.5, 1]; %! nu = [2; 3]; %! n = 2; %! x = mvtrnd (sigma, nu, 2); %! assert (size (x), [2, 2]); statistics-1.3.0/inst/nakacdf.m0000644000000000000000000000627112776476211014600 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} nakacdf (@var{x}, @var{m}, @var{w}) ## For each element of @var{x}, compute the cumulative distribution function ## (CDF) at @var{x} of the Nakagami distribution with shape parameter @var{m} ## and scale parameter @var{w}. ## ## @end deftypefn ## Author: Dag Lyberg ## Description: CDF of the Nakagami distribution function cdf = nakacdf (x, m, w) if (nargin != 3) print_usage (); endif if (! isscalar (m) || ! isscalar (w)) [retval, x, m, w] = common_size (x, m, w); if (retval > 0) error ("nakacdf: X, M and W must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (m) || iscomplex (w)) error ("nakacdf: X, M and W must not be complex"); endif if (isa (x, "single") || isa (m, "single") || isa (w, "single")) inv = zeros (size (x), "single"); else inv = zeros (size (x)); endif k = isnan (x) | ! (m > 0) | ! (w > 0); cdf(k) = NaN; k = (x == Inf) & (0 < m) & (m < Inf) & (0 < w) & (w < Inf); cdf(k) = 1; k = (0 < x) & (x < Inf) & (0 < m) & (m < Inf) & (0 < w) & (w < Inf); if (isscalar(x) && isscalar (m) && isscalar(w)) left = m; right = (m/w) * x^2; cdf(k) = gammainc(right, left); elseif (isscalar (m) && isscalar(w)) left = m * ones(size(x)); right = (m/w) * x.^2; cdf(k) = gammainc(right(k), left(k)); else left = m .* ones(size(x)); right = (m./w) .* x.^2; cdf(k) = gammainc(right(k), left(k)); endif endfunction %!shared x,y %! x = [-1, 0, 1, 2, Inf]; %! y = [0, 0, 0.63212055882855778, 0.98168436111126578, 1]; %!assert (nakacdf (x, ones (1,5), ones (1,5)), y) %!assert (nakacdf (x, 1, 1), y) %!assert (nakacdf (x, [1, 1, NaN, 1, 1], 1), [y(1:2), NaN, y(4:5)]) %!assert (nakacdf (x, 1, [1, 1, NaN, 1, 1]), [y(1:2), NaN, y(4:5)]) %!assert (nakacdf ([x, NaN], 1, 1), [y, NaN]) ## Test class of input preserved %!assert (nakacdf (single ([x, NaN]), 1, 1), single ([y, NaN]), eps('single')) %!assert (nakacdf ([x, NaN], single (1), 1), single ([y, NaN]), eps('single')) %!assert (nakacdf ([x, NaN], 1, single (1)), single ([y, NaN]), eps('single')) ## Test input validation %!error nakacdf () %!error nakacdf (1) %!error nakacdf (1,2) %!error nakacdf (1,2,3,4) %!error nakacdf (ones (3), ones (2), ones(2)) %!error nakacdf (ones (2), ones (3), ones(2)) %!error nakacdf (ones (2), ones (2), ones(3)) %!error nakacdf (i, 2, 2) %!error nakacdf (2, i, 2) %!error nakacdf (2, 2, i) statistics-1.3.0/inst/nakainv.m0000644000000000000000000000623312776476211014636 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} nakainv (@var{x}, @var{m}, @var{w}) ## For each element of @var{x}, compute the quantile (the inverse of the CDF) ## at @var{x} of the Nakagami distribution with shape parameter @var{m} and ## scale parameter @var{w}. ## ## @end deftypefn ## Author: Dag Lyberg ## Description: Quantile function of the Nakagami distribution function inv = nakainv (x, m, w) if (nargin != 3) print_usage (); endif if (! isscalar (m) || ! isscalar (w)) [retval, x, m, w] = common_size (x, m, w); if (retval > 0) error ("nakainv: X, M and W must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (m) || iscomplex (w)) error ("nakainv: X, M, and W must not be complex"); endif if (isa (x, "single") || isa (m, "single") || isa (w, "single")) inv = zeros (size (x), "single"); else inv = zeros (size (x)); endif k = isnan (x) | ! (0 <= x) | ! (x <= 1) | ! (-Inf < m) | ! (m < Inf) ... | ! (0 < w) | ! (w < Inf); inv(k) = NaN; k = (x == 1) & (-Inf < m) & (m < Inf) & (0 < w) & (w < Inf); inv(k) = Inf; k = (0 < x) & (x < 1) & (0 < m) & (m < Inf) & (0 < w) & (w < Inf); if (isscalar (m) && isscalar(w)) m_gamma = m; w_gamma = w/m; inv(k) = gaminv(x(k), m_gamma, w_gamma); inv(k) = sqrt(inv(k)); else m_gamma = m; w_gamma = w./m; inv(k) = gaminv(x(k), m_gamma(k), w_gamma(k)); inv(k) = sqrt(inv(k)); endif endfunction %!shared x,y %! x = [-Inf, -1, 0, 1/2, 1, 2, Inf]; %! y = [NaN, NaN, 0, 0.83255461115769769, Inf, NaN, NaN]; %!assert (nakainv (x, ones (1,7), ones (1,7)), y, eps) %!assert (nakainv (x, 1, 1), y, eps) %!assert (nakainv (x, [1, 1, 1, NaN, 1, 1, 1], 1), [y(1:3), NaN, y(5:7)], eps) %!assert (nakainv (x, 1, [1, 1, 1, NaN, 1, 1, 1]), [y(1:3), NaN, y(5:7)], eps) %!assert (nakainv ([x, NaN], 1, 1), [y, NaN], eps) ## Test class of input preserved %!assert (nakainv (single ([x, NaN]), 1, 1), single ([y, NaN])) %!assert (nakainv ([x, NaN], single (1), 1), single ([y, NaN])) %!assert (nakainv ([x, NaN], 1, single (1)), single ([y, NaN])) ## Test input validation %!error nakainv () %!error nakainv (1) %!error nakainv (1,2) %!error nakainv (1,2,3,4) %!error nakainv (ones (3), ones (2), ones(2)) %!error nakainv (ones (2), ones (3), ones(2)) %!error nakainv (ones (2), ones (2), ones(3)) %!error nakainv (i, 2, 2) %!error nakainv (2, i, 2) %!error nakainv (2, 2, i) statistics-1.3.0/inst/nakapdf.m0000644000000000000000000000611712776476211014614 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} nakapdf (@var{x}, @var{m}, @var{w}) ## For each element of @var{x}, compute the probability density function (PDF) ## at @var{x} of the Nakagami distribution with shape parameter @var{m} and ## scale parameter @var{w}. ## @end deftypefn ## Author: Dag Lyberg ## Description: PDF of the Nakagami distribution function pdf = nakapdf (x, m, w) if (nargin != 3) print_usage (); endif if (! isscalar (m) || ! isscalar (w)) [retval, x, m, w] = common_size (x, m, w); if (retval > 0) error ("nakapdf: X, M and W must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (m) || iscomplex (w)) error ("nakapdf: X, M and W must not be complex"); endif if (isa (x, "single") || isa (m, "single") || isa (w, "single")) pdf = zeros (size (x), "single"); else pdf = zeros (size (x)); endif k = isnan (x) | ! (m > 0.5) | ! (w > 0); pdf(k) = NaN; k = (0 < x) & (x < Inf) & (0 < m) & (m < Inf) & (0 < w) & (w < Inf); if (isscalar (m) && isscalar(w)) pdf(k) = exp (log (2) + m*log (m) - log (gamma (m)) - ... m*log (w) + (2*m-1) * ... log (x(k)) - (m/w) * x(k).^2); else pdf(k) = exp(log(2) + m(k).*log (m(k)) - log (gamma (m(k))) - ... m(k).*log (w(k)) + (2*m(k)-1) ... .* log (x(k)) - (m(k)./w(k)) .* x(k).^2); endif endfunction %!shared x,y %! x = [-1, 0, 1, 2, Inf]; %! y = [0, 0, 0.73575888234288467, 0.073262555554936715, 0]; %!assert (nakapdf (x, ones (1,5), ones (1,5)), y, eps) %!assert (nakapdf (x, 1, 1), y, eps) %!assert (nakapdf (x, [1, 1, NaN, 1, 1], 1), [y(1:2), NaN, y(4:5)], eps) %!assert (nakapdf (x, 1, [1, 1, NaN, 1, 1]), [y(1:2), NaN, y(4:5)], eps) %!assert (nakapdf ([x, NaN], 1, 1), [y, NaN], eps) ## Test class of input preserved %!assert (nakapdf (single ([x, NaN]), 1, 1), single ([y, NaN])) %!assert (nakapdf ([x, NaN], single (1), 1), single ([y, NaN])) %!assert (nakapdf ([x, NaN], 1, single (1)), single ([y, NaN])) ## Test input validation %!error nakapdf () %!error nakapdf (1) %!error nakapdf (1,2) %!error nakapdf (1,2,3,4) %!error nakapdf (ones (3), ones (2), ones(2)) %!error nakapdf (ones (2), ones (3), ones(2)) %!error nakapdf (ones (2), ones (2), ones(3)) %!error nakapdf (i, 2, 2) %!error nakapdf (2, i, 2) %!error nakapdf (2, 2, i) statistics-1.3.0/inst/nakarnd.m0000644000000000000000000001055112776476211014623 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} nakarnd (@var{m}, @var{w}) ## @deftypefnx {} {} nakarnd (@var{m}, @var{w}, @var{r}) ## @deftypefnx {} {} nakarnd (@var{m}, @var{w}, @var{r}, @var{c}, @dots{}) ## @deftypefnx {} {} nakarnd (@var{m}, @var{w}, [@var{sz}]) ## Return a matrix of random samples from the Nakagami distribution with ## shape parameter @var{m} and scale @var{w}. ## ## When called with a single size argument, return a square matrix with ## the dimension specified. When called with more than one scalar argument the ## first two arguments are taken as the number of rows and columns and any ## further arguments specify additional matrix dimensions. The size may also ## be specified with a vector of dimensions @var{sz}. ## ## If no size arguments are given then the result matrix is the common size of ## @var{m} and @var{w}. ## @end deftypefn ## Author: Dag Lyberg ## Description: Random deviates from the Nakagami distribution function rnd = nakarnd (m, w, varargin) if (nargin < 2) print_usage (); endif if (! isscalar (m) || ! isscalar (w)) [retval, m, w] = common_size (m, w); if (retval > 0) error ("nakarnd: M and W must be of common size or scalars"); endif endif if (iscomplex (m) || iscomplex (w)) error ("nakarnd: M and W must not be complex"); endif if (nargin == 2) sz = size (m); elseif (nargin == 3) if (isscalar (varargin{1}) && varargin{1} >= 0) sz = [varargin{1}, varargin{1}]; elseif (isrow (varargin{1}) && all (varargin{1} >= 0)) sz = varargin{1}; else error ("nakarnd: dimension vector must be row vector of non-negative integers"); endif elseif (nargin > 3) if (any (cellfun (@(x) (! isscalar (x) || x < 0), varargin))) error ("nakarnd: dimensions must be non-negative integers"); endif sz = [varargin{:}]; endif if (! isscalar (m) && ! isequal (size (w), sz)) error ("nakagrnd: M and W must be scalar or of size SZ"); endif if (isa (m, "single") || isa (w, "single")) cls = "single"; else cls = "double"; endif if (isscalar (m) && isscalar (w)) if ((0 < m) && (m < Inf) && (0 < w) && (w < Inf)) m_gamma = m; w_gamma = w/m; rnd = gamrnd(m_gamma, w_gamma, sz); rnd = sqrt(rnd); else rnd = NaN (sz, cls); endif else rnd = NaN (sz, cls); k = (0 < m) & (m < Inf) & (0 < w) & (w < Inf); m_gamma = m; w_gamma = w./m; rnd(k) = gamrnd(m_gamma(k), w_gamma(k)); rnd(k) = sqrt(rnd(k)); endif endfunction %!assert (size (nakarnd (1,1)), [1, 1]) %!assert (size (nakarnd (ones (2,1), 1)), [2, 1]) %!assert (size (nakarnd (ones (2,2), 1)), [2, 2]) %!assert (size (nakarnd (1, ones (2,1))), [2, 1]) %!assert (size (nakarnd (1, ones (2,2))), [2, 2]) %!assert (size (nakarnd (1,1, 3)), [3, 3]) %!assert (size (nakarnd (1,1, [4 1])), [4, 1]) %!assert (size (nakarnd (1,1, 4, 1)), [4, 1]) ## Test class of input preserved %!assert (class (nakarnd (1,1)), "double") %!assert (class (nakarnd (single (1),1)), "single") %!assert (class (nakarnd (single ([1 1]),1)), "single") %!assert (class (nakarnd (1,single (1))), "single") %!assert (class (nakarnd (1,single ([1 1]))), "single") ## Test input validation %!error nakarnd () %!error nakarnd (1) %!error nakarnd (zeros (3), ones (2)) %!error nakarnd (zeros (2), ones (3)) %!error nakarnd (i, 2) %!error nakarnd (1, i) %!error nakarnd (1,2, -1) %!error nakarnd (1,2, ones (2)) %!error nakarnd (1, 2, [2 -1 2]) %!error nakarnd (1,2, 1, ones (2)) %!error nakarnd (1,2, 1, -1) %!error nakarnd (ones (2,2), 2, 3) %!error nakarnd (ones (2,2), 2, [3, 2]) %!error nakarnd (ones (2,2), 2, 2, 3) statistics-1.3.0/inst/nanmax.m0000755000000000000000000000342312776476211014472 0ustar 00000000000000## Copyright (C) 2001 Paul Kienzle ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{v}, @var{idx}] =} nanmax (@var{X}) ## @deftypefnx{Function File} {[@var{v}, @var{idx}] =} nanmax (@var{X}, @var{Y}) ## Find the maximal element while ignoring NaN values. ## ## @code{nanmax} is identical to the @code{max} function except that NaN values ## are ignored. If all values in a column are NaN, the maximum is ## returned as NaN rather than []. ## ## @seealso{max, nansum, nanmin, nanmean, nanmedian} ## @end deftypefn function [v, idx] = nanmax (X, Y, DIM) if nargin < 1 || nargin > 3 print_usage; elseif nargin == 1 || (nargin == 2 && isempty(Y)) nanvals = isnan(X); X(nanvals) = -Inf; [v, idx] = max (X); v(all(nanvals)) = NaN; elseif (nargin == 3 && isempty(Y)) nanvals = isnan(X); X(nanvals) = -Inf; [v, idx] = max (X,[],DIM); v(all(nanvals,DIM)) = NaN; else Xnan = isnan(X); Ynan = isnan(Y); X(Xnan) = -Inf; Y(Ynan) = -Inf; if (nargin == 3) [v, idx] = max(X,Y,DIM); else [v, idx] = max(X,Y); endif v(Xnan & Ynan) = NaN; endif endfunction statistics-1.3.0/inst/nanmean.m0000755000000000000000000000250712776476211014627 0ustar 00000000000000## Copyright (C) 2001 Paul Kienzle ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{v} =} nanmean (@var{X}) ## @deftypefnx{Function File} {@var{v} =} nanmean (@var{X}, @var{dim}) ## Compute the mean value while ignoring NaN values. ## ## @code{nanmean} is identical to the @code{mean} function except that NaN values ## are ignored. If all values are NaN, the mean is returned as NaN. ## ## @seealso{mean, nanmin, nanmax, nansum, nanmedian} ## @end deftypefn function v = nanmean (X, varargin) if nargin < 1 print_usage; else n = sum (!isnan(X), varargin{:}); n(n == 0) = NaN; X(isnan(X)) = 0; v = sum (X, varargin{:}) ./ n; endif endfunction statistics-1.3.0/inst/nanmedian.m0000755000000000000000000000522512776476211015144 0ustar 00000000000000## Copyright (C) 2001 Paul Kienzle ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} @var{v} = nanmedian (@var{x}) ## @deftypefnx{Function File} @var{v} = nanmedian (@var{x}, @var{dim}) ## Compute the median of data while ignoring NaN values. ## ## This function is identical to the @code{median} function except that NaN values ## are ignored. If all values are NaN, the median is returned as NaN. ## ## @seealso{median, nanmin, nanmax, nansum, nanmean} ## @end deftypefn function v = nanmedian (X, varargin) if nargin < 1 || nargin > 2 print_usage; endif if nargin < 2 dim = min(find(size(X)>1)); if isempty(dim), dim=1; endif; else dim = varargin{:}; endif sz = size (X); if (prod (sz) > 1) ## Find lengths of datasets after excluding NaNs; valid datasets ## are those that are not empty after you remove all the NaNs n = sz(dim) - sum (isnan(X),varargin{:}); ## When n is equal to zero, force it to one, so that median ## picks up a NaN value below n (n==0) = 1; ## Sort the datasets, with the NaN going to the end of the data X = sort (X, varargin{:}); ## Determine the offset for each column in single index mode colidx = reshape((0:(prod(sz) / sz(dim) - 1)), size(n)); colidx = floor(colidx / prod(sz(1:dim-1))) * prod(sz(1:dim)) + ... mod(colidx,prod(sz(1:dim-1))); stride = prod(sz(1:dim-1)); ## Average the two central values of the sorted list to compute ## the median, but only do so for valid rows. If the dataset ## is odd length, the single central value will be used twice. ## E.g., ## for n==5, ceil(2.5+0.5) is 3 and floor(2.5+0.5) is also 3 ## for n==6, ceil(3.0+0.5) is 4 and floor(3.0+0.5) is 3 ## correction made for stride of data "stride*ceil(2.5-0.5)+1" v = (X(colidx + stride*ceil(n./2-0.5) + 1) + ... X(colidx + stride*floor(n./2-0.5) + 1)) ./ 2; else error ("nanmedian: invalid matrix argument"); endif endfunction statistics-1.3.0/inst/nanmin.m0000755000000000000000000000352012776476211014466 0ustar 00000000000000## Copyright (C) 2001 Paul Kienzle ## Copyright (C) 2003 Alois Schloegl ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{v}, @var{idx}] =} nanmin (@var{X}) ## @deftypefnx{Function File} {[@var{v}, @var{idx}] =} nanmin (@var{X}, @var{Y}) ## Find the minimal element while ignoring NaN values. ## ## @code{nanmin} is identical to the @code{min} function except that NaN values ## are ignored. If all values in a column are NaN, the minimum is ## returned as NaN rather than []. ## ## @seealso{min, nansum, nanmax, nanmean, nanmedian} ## @end deftypefn function [v, idx] = nanmin (X, Y, DIM) if nargin < 1 || nargin > 3 print_usage; elseif nargin == 1 || (nargin == 2 && isempty(Y)) nanvals = isnan(X); X(nanvals) = Inf; [v, idx] = min (X); v(all(nanvals)) = NaN; elseif (nargin == 3 && isempty(Y)) nanvals = isnan(X); X(nanvals) = Inf; [v, idx] = min (X,[],DIM); v(all(nanvals,DIM)) = NaN; else Xnan = isnan(X); Ynan = isnan(Y); X(Xnan) = Inf; Y(Ynan) = Inf; if (nargin == 3) [v, idx] = min(X,Y,DIM); else [v, idx] = min(X,Y); endif v(Xnan & Ynan) = NaN; endif endfunction statistics-1.3.0/inst/nanstd.m0000755000000000000000000000614212776476211014500 0ustar 00000000000000## Copyright (C) 2001 Paul Kienzle ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{v} =} nanstd (@var{X}) ## @deftypefnx{Function File} {@var{v} =} nanstd (@var{X}, @var{opt}) ## @deftypefnx{Function File} {@var{v} =} nanstd (@var{X}, @var{opt}, @var{dim}) ## Compute the standard deviation while ignoring NaN values. ## ## @code{nanstd} is identical to the @code{std} function except that NaN values are ## ignored. If all values are NaN, the standard deviation is returned as NaN. ## If there is only a single non-NaN value, the deviation is returned as 0. ## ## The argument @var{opt} determines the type of normalization to use. Valid values ## are ## ## @table @asis ## @item 0: ## normalizes with @math{N-1}, provides the square root of best unbiased estimator of ## the variance [default] ## @item 1: ## normalizes with @math{N}, this provides the square root of the second moment around ## the mean ## @end table ## ## The third argument @var{dim} determines the dimension along which the standard ## deviation is calculated. ## ## @seealso{std, nanmin, nanmax, nansum, nanmedian, nanmean} ## @end deftypefn function v = nanstd (X, opt, varargin) if nargin < 1 print_usage; else if nargin < 3 dim = min(find(size(X)>1)); if isempty(dim), dim=1; endif; else dim = varargin{1}; endif if ((nargin < 2) || isempty(opt)) opt = 0; endif ## determine the number of non-missing points in each data set n = sum (!isnan(X), varargin{:}); ## replace missing data with zero and compute the mean X(isnan(X)) = 0; meanX = sum (X, varargin{:}) ./ n; ## subtract the mean from the data and compute the sum squared sz = ones(1,length(size(X))); sz(dim) = size(X,dim); v = sumsq (X - repmat(meanX,sz), varargin{:}); ## because the missing data was set to zero each missing data ## point will contribute (-meanX)^2 to sumsq, so remove these v = v - (meanX .^ 2) .* (size(X,dim) - n); if (opt == 0) ## compute the standard deviation from the corrected sumsq using ## max(n-1,1) in the denominator so that the std for a single point is 0 v = sqrt ( v ./ max(n - 1, 1) ); elseif (opt == 1) ## compute the standard deviation from the corrected sumsq v = sqrt ( v ./ n ); else error ("std: unrecognized normalization type"); endif ## make sure that we return a real number v = real (v); endif endfunction statistics-1.3.0/inst/nansum.m0000755000000000000000000000402412776476211014507 0ustar 00000000000000## Copyright (C) 2001 Paul Kienzle ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Built-in Function} {} nansum (@var{x}) ## @deftypefnx {Built-in Function} {} nansum (@var{x}, @var{dim}) ## @deftypefnx {Built-in Function} {} nansum (@dots{}, @qcode{"native"}) ## @deftypefnx {Built-in Function} {} nansum (@dots{}, @qcode{"double"}) ## @deftypefnx {Built-in Function} {} nansum (@dots{}, @qcode{"extra"}) ## Compute the sum while ignoring NaN values. ## ## @code{nansum} is identical to the @code{sum} function except that NaN ## values are treated as 0 and so ignored. If all values are NaN, the sum is ## returned as 0. ## ## See help text of @code{sum} for details on the options. ## ## @seealso{sum, nanmin, nanmax, nanmean, nanmedian} ## @end deftypefn function v = nansum (X, varargin) if (nargin < 1) print_usage (); else X(isnan (X)) = 0; v = sum (X, varargin{:}); endif endfunction %!assert (nansum ([2 4 NaN 7]), 13) %!assert (nansum ([2 4 NaN Inf]), Inf) %!assert (nansum ([1 NaN 3; NaN 5 6; 7 8 NaN]), [8 13 9]) %!assert (nansum ([1 NaN 3; NaN 5 6; 7 8 NaN], 2), [4; 11; 15]) %!assert (nansum (single ([1 NaN 3; NaN 5 6; 7 8 NaN])), single ([8 13 9])) %!assert (nansum (single ([1 NaN 3; NaN 5 6; 7 8 NaN]), "double"), [8 13 9]) %!assert (nansum (uint8 ([2 4 1 7])), 14) %!assert (nansum (uint8 ([2 4 1 7]), "native"), uint8 (14)) %!assert (nansum (uint8 ([2 4 1 7])), 14) statistics-1.3.0/inst/nanvar.m0000755000000000000000000000400012776476211014465 0ustar 00000000000000# Copyright (C) 2008 Sylvain Pelissier ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {} nanvar (@var{x}) ## @deftypefnx{Function File} {@var{v} =} nanvar (@var{X}, @var{opt}) ## @deftypefnx{Function File} {@var{v} =} nanvar (@var{X}, @var{opt}, @var{dim}) ## Compute the variance while ignoring NaN values. ## ## For vector arguments, return the (real) variance of the values. ## For matrix arguments, return a row vector containing the variance for ## each column. ## ## The argument @var{opt} determines the type of normalization to use. ## Valid values are ## ## @table @asis ## @item 0: ## Normalizes with @math{N-1}, provides the best unbiased estimator of the ## variance [default]. ## @item 1: ## Normalizes with @math{N}, this provides the second moment around the mean. ## @end table ## ## The third argument @var{dim} determines the dimension along which the ## variance is calculated. ## ## @seealso{var, nanmean, nanstd, nanmax, nanmin} ## @end deftypefn function y = nanvar(x,w,dim) if nargin < 1 print_usage (); else if ((nargin < 2) || isempty(w)) w = 0; endif if nargin < 3 dim = min(find(size(x)>1)); if isempty(dim) dim=1; endif endif y = nanstd(x,w,dim).^2; endif endfunction ## Tests %!shared x %! x = [1 2 nan 3 4 5]; %!assert (nanvar (x), var (x(! isnan (x))), 10*eps) statistics-1.3.0/inst/nbinstat.m0000755000000000000000000000643012776476211015033 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} nbinstat (@var{n}, @var{p}) ## Compute mean and variance of the negative binomial distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{n} is the first parameter of the negative binomial distribution. The elements ## of @var{n} must be natural numbers ## ## @item ## @var{p} is the second parameter of the negative binomial distribution. The ## elements of @var{p} must be probabilities ## @end itemize ## @var{n} and @var{p} must be of common size or one of them must be scalar ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the negative binomial distribution ## ## @item ## @var{v} is the variance of the negative binomial distribution ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## n = 1:4; ## p = 0.2:0.2:0.8; ## [m, v] = nbinstat (n, p) ## @end group ## ## @group ## [m, v] = nbinstat (n, 0.5) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the negative binomial distribution function [m, v] = nbinstat (n, p) # Check arguments if (nargin != 2) print_usage (); endif if (! isempty (n) && ! ismatrix (n)) error ("nbinstat: n must be a numeric matrix"); endif if (! isempty (p) && ! ismatrix (p)) error ("nbinstat: p must be a numeric matrix"); endif if (! isscalar (n) || ! isscalar (p)) [retval, n, p] = common_size (n, p); if (retval > 0) error ("nbinstat: n and p must be of common size or scalar"); endif endif # Calculate moments q = 1 - p; m = n .* q ./ p; v = n .* q ./ (p .^ 2); # Continue argument check k = find (! (n > 0) | ! (n < Inf) | ! (p > 0) | ! (p < 1)); if (any (k)) m(k) = NaN; v(k) = NaN; endif endfunction %!test %! n = 1:4; %! p = 0.2:0.2:0.8; %! [m, v] = nbinstat (n, p); %! expected_m = [ 4.0000, 3.0000, 2.0000, 1.0000]; %! expected_v = [20.0000, 7.5000, 3.3333, 1.2500]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); %!test %! n = 1:4; %! [m, v] = nbinstat (n, 0.5); %! expected_m = [1, 2, 3, 4]; %! expected_v = [2, 4, 6, 8]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); statistics-1.3.0/inst/normalise_distribution.m0000755000000000000000000002142612776476211020003 0ustar 00000000000000## Copyright (C) 2011 Alexander Klein ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn{Function File} {@var{NORMALISED} =} normalise_distribution (@var{DATA}) ## @deftypefnx{Function File} {@var{NORMALISED} =} normalise_distribution (@var{DATA}, @var{DISTRIBUTION}) ## @deftypefnx{Function File} {@var{NORMALISED} =} normalise_distribution (@var{DATA}, @var{DISTRIBUTION}, @var{DIMENSION}) ## ## Transform a set of data so as to be N(0,1) distributed according to an idea ## by van Albada and Robinson. ## This is achieved by first passing it through its own cumulative distribution ## function (CDF) in order to get a uniform distribution, and then mapping ## the uniform to a normal distribution. ## The data must be passed as a vector or matrix in @var{DATA}. ## If the CDF is unknown, then [] can be passed in @var{DISTRIBUTION}, and in ## this case the empirical CDF will be used. ## Otherwise, if the CDFs for all data are known, they can be passed in ## @var{DISTRIBUTION}, ## either in the form of a single function name as a string, ## or a single function handle, ## or a cell array consisting of either all function names as strings, ## or all function handles. ## In the latter case, the number of CDFs passed must match the number ## of rows, or columns respectively, to normalise. ## If the data are passed as a matrix, then the transformation will ## operate either along the first non-singleton dimension, ## or along @var{DIMENSION} if present. ## ## Notes: ## The empirical CDF will map any two sets of data ## having the same size and their ties in the same places after sorting ## to some permutation of the same normalised data: ## @example ## @code{normalise_distribution([1 2 2 3 4])} ## @result{} -1.28 0.00 0.00 0.52 1.28 ## ## @code{normalise_distribution([1 10 100 10 1000])} ## @result{} -1.28 0.00 0.52 0.00 1.28 ## @end example ## ## Original source: ## S.J. van Albada, P.A. Robinson ## "Transformation of arbitrary distributions to the ## normal distribution with application to EEG ## test-retest reliability" ## Journal of Neuroscience Methods, Volume 161, Issue 2, ## 15 April 2007, Pages 205-211 ## ISSN 0165-0270, 10.1016/j.jneumeth.2006.11.004. ## (http://www.sciencedirect.com/science/article/pii/S0165027006005668) ## @end deftypefn function [ normalised ] = normalise_distribution ( data, distribution, dimension ) if ( nargin < 1 || nargin > 3 ) print_usage; elseif ( !ismatrix ( data ) || length ( size ( data ) ) > 2 ) error ( "First argument must be a vector or matrix" ); end if ( nargin >= 2 ) if ( !isempty ( distribution ) ) #Wrap a single handle in a cell array. if ( strcmp ( typeinfo ( distribution ), typeinfo ( @(x)(x) ) ) ) distribution = { distribution }; #Do we have a string argument instead? elseif ( ischar ( distribution ) ) ##Is it a single string? if ( rows ( distribution ) == 1 ) distribution = { str2func( distribution ) }; else error ( ["Second argument cannot contain more than one string" ... " unless in a cell array"] ); end ##Do we have a cell array of distributions instead? elseif ( iscell ( distribution ) ) ##Does it consist of strings only? if ( all ( cellfun ( @ischar, distribution ) ) ) distribution = cellfun ( @str2func, distribution, "UniformOutput", false ); end ##Does it eventually consist of function handles only if ( !all ( cellfun ( @ ( h ) ( strcmp ( typeinfo ( h ), typeinfo ( @(x)(x) ) ) ), distribution ) ) ) error ( ["Second argument must contain either" ... " a single function name or handle or " ... " a cell array of either all function names or handles!"] ); end else error ( "Illegal second argument: ", typeinfo ( distribution ) ); end end else distribution = []; end if ( nargin == 3 ) if ( !isscalar ( dimension ) || ( dimension != 1 && dimension != 2 ) ) error ( "Third argument must be either 1 or 2" ); end else if ( isvector ( data ) && rows ( data ) == 1 ) dimension = 2; else dimension = 1; end end trp = ( dimension == 2 ); if ( trp ) data = data'; end r = rows ( data ); c = columns ( data ); normalised = NA ( r, c ); ##Do we know the distribution of the sample? if ( isempty ( distribution ) ) precomputed_normalisation = []; for k = 1 : columns ( data ) ##Note that this line is in accordance with equation (16) in the ##original text. The author's original program, however, produces ##different values in the presence of ties, namely those you'd ##get replacing "last" by "first". [ uniq, indices ] = unique ( sort ( data ( :, k ) ), "last" ); ##Does the sample have ties? if ( rows ( uniq ) != r ) ##Transform to uniform, then normal distribution. uniform = ( indices - 1/2 ) / r; normal = norminv ( uniform ); else ## Without ties everything is pretty much straightforward as ## stated in the text. if ( isempty ( precomputed_normalisation ) ) precomputed_normalisation = norminv ( 1 / (2*r) : 1/r : 1 - 1 / (2*r) ); end normal = precomputed_normalisation; end #Find the original indices in the unsorted sample. #This somewhat quirky way of doing it is still faster than #using a for-loop. [ ignore, ignore, target_indices ] = unique ( data (:, k ) ); #Put normalised values in the places where they belong. f_remap = @( k ) ( normal ( k ) ); normalised ( :, k ) = arrayfun ( f_remap, target_indices ); end else ##With known distributions, everything boils down to a few lines of code ##The same distribution for all data? if ( all ( size ( distribution ) == 1 ) ) normalised = norminv ( distribution {1,1} ( data ) ); elseif ( length ( vec ( distribution ) ) == c ) for k = 1 : c normalised ( :, k ) = norminv ( distribution { k } ( data ) ( :, k ) ); end else error ( "Number of distributions does not match data size! ") end end if ( trp ) normalised = normalised'; end endfunction %!test %! v = normalise_distribution ( [ 1 2 3 ], [], 1 ); %! assert ( v, [ 0 0 0 ] ) %!test %! v = normalise_distribution ( [ 1 2 3 ], [], 2 ); %! assert ( v, norminv ( [ 1 3 5 ] / 6 ), 3 * eps ) %!test %! v = normalise_distribution ( [ 1 2 3 ]', [], 2 ); %! assert ( v, [ 0 0 0 ]' ) %!test %! v = normalise_distribution ( [ 1 2 3 ]' , [], 1 ); %! assert ( v, norminv ( [ 1 3 5 ]' / 6 ), 3 * eps ) %!test %! v = normalise_distribution ( [ 1 1 2 2 3 3 ], [], 2 ); %! assert ( v, norminv ( [ 3 3 7 7 11 11 ] / 12 ), 3 * eps ) %!test %! v = normalise_distribution ( [ 1 1 2 2 3 3 ]', [], 1 ); %! assert ( v, norminv ( [ 3 3 7 7 11 11 ]' / 12 ), 3 * eps ) %!test %! A = randn ( 10 ); %! N = normalise_distribution ( A, @normcdf ); %! assert ( A, N, 1000 * eps ) %!xtest %! A = exprnd ( 1, 100 ); %! N = normalise_distribution ( A, @ ( x ) ( expcdf ( x, 1 ) ) ); %! assert ( mean ( vec ( N ) ), 0, 0.1 ) %! assert ( std ( vec ( N ) ), 1, 0.1 ) %!xtest %! A = rand (1000,1); %! N = normalise_distribution ( A, "unifcdf" ); %! assert ( mean ( vec ( N ) ), 0, 0.1 ) %! assert ( std ( vec ( N ) ), 1, 0.1 ) %!xtest %! A = [rand(1000,1), randn( 1000, 1)]; %! N = normalise_distribution ( A, { "unifcdf", "normcdf" } ); %! assert ( mean ( N ), [ 0, 0 ], 0.1 ) %! assert ( std ( N ), [ 1, 1 ], 0.1 ) %!xtest %! A = [rand(1000,1), randn( 1000, 1), exprnd( 1, 1000, 1 )]'; %! N = normalise_distribution ( A, { @unifcdf; @normcdf; @( x )( expcdf ( x, 1 ) ) }, 2 ); %! assert ( mean ( N, 2 ), [ 0, 0, 0 ]', 0.1 ) %! assert ( std ( N, [], 2 ), [ 1, 1, 1 ]', 0.1 ) %!xtest %! A = exprnd ( 1, 1000, 9 ); A ( 300 : 500, 4:6 ) = 17; %! N = normalise_distribution ( A ); %! assert ( mean ( N ), [ 0 0 0 0.38 0.38 0.38 0 0 0 ], 0.1 ); %! assert ( var ( N ), [ 1 1 1 2.59 2.59 2.59 1 1 1 ], 0.1 ); %!test %! fail ("normalise_distribution( zeros ( 3, 4 ), { @unifcdf; @normcdf; @( x )( expcdf ( x, 1 ) ) } )", ... %! "Number of distributions does not match data size!"); statistics-1.3.0/inst/normplot.m0000755000000000000000000000435112776476211015063 0ustar 00000000000000## Author: Paul Kienzle ## This program is granted to the public domain. ## -*- texinfo -*- ## @deftypefn {Function File} normplot (@var{X}) ## Produce normal probability plot for each column of @var{X}. ## ## The line joing the 1st and 3rd quantile is drawn on the ## graph. If the underlying distribution is normal, the ## points will cluster around this line. ## ## Note that this function sets the title, xlabel, ylabel, ## axis, grid, tics and hold properties of the graph. These ## need to be cleared before subsequent graphs using 'clf'. ## @end deftypefn function normplot(X) if nargin!=1, print_usage; end if (rows(X) == 1), X=X(:); end # Transform data n = rows(X); if n<2, error("normplot requires a vector"); end q = norminv([1:n]'/(n+1)); Y = sort(X); # Find the line joining the first to the third quartile for each column q1 = ceil(n/4); q3 = n-q1+1; m = (q(q3)-q(q1))./(Y(q3,:)-Y(q1,:)); p = [ m; q(q1)-m.*Y(q1,:) ]; # Plot the lines one at a time. Plot the lines before overlaying the # normals so that the default label is 'line n'. if columns(Y)==1, leg = "+;;"; else leg = "%d+;Column %d;"; endif for i=1:columns(Y) plot(Y(:,i),q,sprintf(leg,i,i)); hold on; # estimate the mean and standard deviation by linear regression # [v,dv] = wpolyfit(q,Y(:,i),1) end # Overlay the estimated normal lines. for i=1:columns(Y) # Use the end points and one point guaranteed to be in the view since # gnuplot skips any lines whose points are all outside the view. pts = [Y(1,i);Y(q1,i);Y(end,i)]; plot(pts, polyval(p(:,i),pts), [num2str(i),";;"]); end hold off; # plot labels title "Normal Probability Plot" ylabel "% Probability" xlabel "Data" # plot grid t = [0.00001;0.0001;0.001;0.01;0.1;0.3;1;2;5;10;25;50; 75;90;95;98;99;99.7;99.9;99.99;99.999;99.9999;99.99999]; set(gca, "ytick", norminv(t/100), "yticklabel", num2str(t)); grid on # Set view range with a bit of space around data miny = min(Y(:)); minq = min(q(1),norminv(0.05)); maxy = max(Y(:)); maxq = max(q(end),norminv(0.95)); yspace = (maxy-miny)*0.05; qspace = (q(end)-q(1))*0.05; axis ([miny-yspace, maxy+yspace, minq-qspace, maxq+qspace]); end statistics-1.3.0/inst/normstat.m0000755000000000000000000000611112776476211015054 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{mn}, @var{v}] =} normstat (@var{m}, @var{s}) ## Compute mean and variance of the normal distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{m} is the mean of the normal distribution ## ## @item ## @var{s} is the standard deviation of the normal distribution. ## @var{s} must be positive ## @end itemize ## @var{m} and @var{s} must be of common size or one of them must be ## scalar ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{mn} is the mean of the normal distribution ## ## @item ## @var{v} is the variance of the normal distribution ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## m = 1:6; ## s = 0:0.2:1; ## [mn, v] = normstat (m, s) ## @end group ## ## @group ## [mn, v] = normstat (0, s) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the normal distribution function [mn, v] = normstat (m, s) # Check arguments if (nargin != 2) print_usage (); endif if (! isempty (m) && ! ismatrix (m)) error ("normstat: m must be a numeric matrix"); endif if (! isempty (s) && ! ismatrix (s)) error ("normstat: s must be a numeric matrix"); endif if (! isscalar (m) || ! isscalar (s)) [retval, m, s] = common_size (m, s); if (retval > 0) error ("normstat: m and s must be of common size or scalar"); endif endif # Set moments mn = m; v = s .* s; # Continue argument check k = find (! (s > 0) | ! (s < Inf)); if (any (k)) mn(k) = NaN; v(k) = NaN; endif endfunction %!test %! m = 1:6; %! s = 0.2:0.2:1.2; %! [mn, v] = normstat (m, s); %! expected_v = [0.0400, 0.1600, 0.3600, 0.6400, 1.0000, 1.4400]; %! assert (mn, m); %! assert (v, expected_v, 0.001); %!test %! s = 0.2:0.2:1.2; %! [mn, v] = normstat (0, s); %! expected_mn = [0, 0, 0, 0, 0, 0]; %! expected_v = [0.0400, 0.1600, 0.3600, 0.6400, 1.0000, 1.4400]; %! assert (mn, expected_mn, 0.001); %! assert (v, expected_v, 0.001); statistics-1.3.0/inst/pcacov.m0000755000000000000000000000454612776476211014472 0ustar 00000000000000## Copyright (C) 2013 Fernando Damian Nieuwveldt ## ## This program is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License ## as published by the Free Software Foundation; either version 3 ## of the License, or (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{COEFF}]} = pcacov(@var{X}) ## @deftypefnx {Function File} {[@var{COEFF},@var{latent}]} = pcacov(@var{X}) ## @deftypefnx {Function File} {[@var{COEFF},@var{latent},@var{explained}]} = pcacov(@var{X}) ## @itemize @bullet ## @item ## pcacov performs principal component analysis on the nxn covariance matrix X ## @item ## @var{COEFF} : a nxn matrix with columns containing the principal component coefficients ## @item ## @var{latent} : a vector containing the principal component variances ## @item ## @var{explained} : a vector containing the percentage of the total variance explained by each principal component ## ## @end itemize ## ## @subheading References ## ## @enumerate ## @item ## Jolliffe, I. T., Principal Component Analysis, 2nd Edition, Springer, 2002 ## ## @end enumerate ## @end deftypefn ## Author: Fernando Damian Nieuwveldt ## Description: Principal Components Analysis using a covariance matrix function [COEFF, latent, explained] = pcacov(X) [U,S,V] = svd(X); if nargout == 1 COEFF = U; elseif nargout == 2 COEFF = U; latent = diag(S); else COEFF = U; latent = diag(S); explained = 100*latent./sum(latent); end endfunction %!demo %! X = [ 7 26 6 60; %! 1 29 15 52; %! 11 56 8 20; %! 11 31 8 47; %! 7 52 6 33; %! 11 55 9 22; %! 3 71 17 6; %! 1 31 22 44; %! 2 54 18 22; %! 21 47 4 26; %! 1 40 23 34; %! 11 66 9 12; %! 10 68 8 12 %! ]; %! covx = cov(X); %! [COEFF,latent,explained] = pcacov(covx) statistics-1.3.0/inst/pcares.m0000755000000000000000000000520712776476211014467 0ustar 00000000000000## Copyright (C) 2013 Fernando Damian Nieuwveldt ## ## This program is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License ## as published by the Free Software Foundation; either version 3 ## of the License, or (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{residuals},@var{reconstructed}]}=pcares(@var{X}, @var{NDIM}) ## @itemize @bullet ## @item ## @var{X} : N x P Matrix with N observations and P variables, the variables will be mean centered ## @item ## @var{ndim} : Is a scalar indicating the number of principal components to use and should be <= P ## @end itemize ## ## @subheading References ## ## @enumerate ## @item ## Jolliffe, I. T., Principal Component Analysis, 2nd Edition, Springer, 2002 ## ## @end enumerate ## @end deftypefn ## Author: Fernando Damian Nieuwveldt ## Description: Residuals from Principal Components Analysis function [residuals,reconstructed] = pcares(X,NDIM) if (nargin ~= 2) error('pcares takes two inputs: The data Matrix X and number of principal components NDIM') endif # Mean center data Xcentered = bsxfun(@minus,X,mean(X)); # Apply svd to get the principal component coefficients [U,S,V] = svd(Xcentered); # Use only the first ndim PCA components v = V(:,1:NDIM); if (nargout == 2) # Calculate the residuals residuals = Xcentered - Xcentered * (v*v'); # Reconstructed data using ndim PCA components reconstructed = X - residuals; else # Calculate the residuals residuals = Xcentered - Xcentered * (v*v'); endif endfunction %!demo %! X = [ 7 26 6 60; %! 1 29 15 52; %! 11 56 8 20; %! 11 31 8 47; %! 7 52 6 33; %! 11 55 9 22; %! 3 71 17 6; %! 1 31 22 44; %! 2 54 18 22; %! 21 47 4 26; %! 1 40 23 34; %! 11 66 9 12; %! 10 68 8 12 %! ]; %! # As we increase the number of principal components, the norm %! # of the residuals matrix will decrease %! r1 = pcares(X,1); %! n1 = norm(r1) %! r2 = pcares(X,2); %! n2 = norm(r2) %! r3 = pcares(X,3); %! n3 = norm(r3) %! r4 = pcares(X,4); %! n4 = norm(r4) statistics-1.3.0/inst/pdf.m0000644000000000000000000001005412776476211013754 0ustar 00000000000000## Copyright (C) 2016 Andreas Stahel ## strongly based on cdf.m by 2013 Pantxo Diribarne ## ## This program is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{retval} =} pdf (@var{name}, @var{X}, @dots{}) ## Return probability density function of @var{name} function for value ## @var{x}. ## This is a wrapper around various @var{name}pdf and @var{name}_pdf ## functions. See the individual functions help to learn the signification of ## the arguments after @var{x}. Supported functions and corresponding number of ## additional arguments are: ## ## @multitable @columnfractions 0.02 0.3 0.45 0.2 ## @headitem @tab function @tab alternative @tab args ## @item @tab "beta" @tab "beta" @tab 2 ## @item @tab "bino" @tab "binomial" @tab 2 ## @item @tab "cauchy" @tab @tab 2 ## @item @tab "chi2" @tab "chisquare" @tab 1 ## @item @tab "discrete" @tab @tab 2 ## @item @tab "exp" @tab "exponential" @tab 1 ## @item @tab "f" @tab @tab 2 ## @item @tab "gam" @tab "gamma" @tab 2 ## @item @tab "geo" @tab "geometric" @tab 1 ## @item @tab "gev" @tab "generalized extreme value" @tab 3 ## @item @tab "hyge" @tab "hypergeometric" @tab 3 ## @item @tab "kolmogorov_smirnov" @tab @tab 1 ## @item @tab "laplace" @tab @tab 2 ## @item @tab "logistic" @tab @tab 0 ## @item @tab "logn" @tab "lognormal" @tab 2 ## @item @tab "norm" @tab "normal" @tab 2 ## @item @tab "poiss" @tab "poisson" @tab 1 ## @item @tab "rayl" @tab "rayleigh" @tab 1 ## @item @tab "t" @tab @tab 1 ## @item @tab "unif" @tab "uniform" @tab 2 ## @item @tab "wbl" @tab "weibull" @tab 2 ## @end multitable ## ## @seealso{betapdf, binopdf, cauchy_pdf, chi2pdf, discrete_pdf, ## exppdf, fpdf, gampdf, geopdf, gevpdf, hygepdf, laplace_pdf, ## logistic_pdf, lognpdf, normpdf, poisspdf, raylpdf, tpdf, ## unifpdf, wblpdf} ## @end deftypefn function [retval] = pdf (varargin) ## implemented functions persistent allpdf = {{"beta", "beta"}, @betapdf, 2, ... {"bino", "binomial"}, @binopdf, 2, ... {"cauchy"}, @cauchy_pdf, 2, ... {"chi2", "chisquare"}, @chi2pdf, 1, ... {"discrete"}, @discrete_pdf, 2, ... {"exp", "exponential"}, @exppdf, 1, ... {"f"}, @fpdf, 2, ... {"gam", "gamma"}, @gampdf, 2, ... {"geo", "geometric"}, @geopdf, 1, ... {"gev", "generalized extreme value"}, @gevpdf, 3, ... {"hyge", "hypergeometric"}, @hygepdf, 3, ... {"laplace"}, @laplace_pdf, 1, ... {"logistic"}, @logistic_pdf, 0, ... # ML has 2 args here {"logn", "lognormal"}, @lognpdf, 2, ... {"norm", "normal"}, @normpdf, 2, ... {"poiss", "poisson"}, @poisspdf, 1, ... {"rayl", "rayleigh"}, @raylpdf, 1, ... {"t"}, @tpdf, 1, ... {"unif", "uniform"}, @unifpdf, 2, ... {"wbl", "weibull"}, @wblpdf, 2}; if (numel (varargin) < 2 || ! ischar (varargin{1})) print_usage (); endif name = varargin{1}; x = varargin{2}; varargin(1:2) = []; nargs = numel (varargin); pdfnames = allpdf(1:3:end); pdfhdl = allpdf(2:3:end); pdfargs = allpdf(3:3:end); idx = cellfun (@(x) any (strcmpi (name, x)), pdfnames); if (any (idx)) if (nargs == pdfargs{idx}) retval = feval (pdfhdl{idx}, x, varargin{:}); else error ("pdf: %s requires %d arguments", name, pdfargs{idx}) endif else error ("pdf: %s not implemented", name); endif endfunction %!test %! assert(pdf ('norm', 1, 0, 1), normpdf (1, 0, 1))statistics-1.3.0/inst/pdist.m0000755000000000000000000001644412776476211014342 0ustar 00000000000000## Copyright (C) 2008 Francesco Potortì ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{y} =} pdist (@var{x}) ## @deftypefnx {Function File} {@var{y} =} pdist (@var{x}, @var{metric}) ## @deftypefnx {Function File} {@var{y} =} pdist (@var{x}, @var{metric}, @var{metricarg}, @dots{}) ## ## Return the distance between any two rows in @var{x}. ## ## @var{x} is the @var{n}x@var{d} matrix representing @var{q} row ## vectors of size @var{d}. ## ## The output is a dissimilarity matrix formatted as a row vector ## @var{y}, @math{(n-1)*n/2} long, where the distances are in ## the order [(1, 2) (1, 3) @dots{} (2, 3) @dots{} (n-1, n)]. You can ## use the @code{squareform} function to display the distances between ## the vectors arranged into an @var{n}x@var{n} matrix. ## ## @code{metric} is an optional argument specifying how the distance is ## computed. It can be any of the following ones, defaulting to ## "euclidean", or a user defined function that takes two arguments ## @var{x} and @var{y} plus any number of optional arguments, ## where @var{x} is a row vector and and @var{y} is a matrix having the ## same number of columns as @var{x}. @code{metric} returns a column ## vector where row @var{i} is the distance between @var{x} and row ## @var{i} of @var{y}. Any additional arguments after the @code{metric} ## are passed as metric (@var{x}, @var{y}, @var{metricarg1}, ## @var{metricarg2} @dots{}). ## ## Predefined distance functions are: ## ## @table @samp ## @item "euclidean" ## Euclidean distance (default). ## ## @item "seuclidean" ## Standardized Euclidean distance. Each coordinate in the sum of ## squares is inverse weighted by the sample variance of that ## coordinate. ## ## @item "mahalanobis" ## Mahalanobis distance: see the function mahalanobis. ## ## @item "cityblock" ## City Block metric, aka Manhattan distance. ## ## @item "minkowski" ## Minkowski metric. Accepts a numeric parameter @var{p}: for @var{p}=1 ## this is the same as the cityblock metric, with @var{p}=2 (default) it ## is equal to the euclidean metric. ## ## @item "cosine" ## One minus the cosine of the included angle between rows, seen as ## vectors. ## ## @item "correlation" ## One minus the sample correlation between points (treated as ## sequences of values). ## ## @item "spearman" ## One minus the sample Spearman's rank correlation between ## observations, treated as sequences of values. ## ## @item "hamming" ## Hamming distance: the quote of the number of coordinates that differ. ## ## @item "jaccard" ## One minus the Jaccard coefficient, the quote of nonzero ## coordinates that differ. ## ## @item "chebychev" ## Chebychev distance: the maximum coordinate difference. ## @end table ## @seealso{linkage, mahalanobis, squareform, pdist2} ## @end deftypefn ## Author: Francesco Potortì function y = pdist (x, metric, varargin) if (nargin < 1) print_usage (); elseif ((nargin > 1) && ! ischar (metric) && ! isa (metric, "function_handle")) error (["pdist: the distance function must be either a string or a " "function handle."]); endif if (nargin < 2) metric = "euclidean"; endif if (! ismatrix (x) || isempty (x)) error ("pdist: x must be a nonempty matrix"); elseif (length (size (x)) > 2) error ("pdist: x must be 1 or 2 dimensional"); endif y = []; if (rows(x) == 1) return; endif if (ischar (metric)) order = nchoosek(1:rows(x),2); Xi = order(:,1); Yi = order(:,2); X = x'; metric = lower (metric); switch (metric) case "euclidean" d = X(:,Xi) - X(:,Yi); y = norm (d, "cols"); case "seuclidean" d = X(:,Xi) - X(:,Yi); weights = inv (diag (var (x, 0, 1))); y = sqrt (sum ((weights * d) .* d, 1)); case "mahalanobis" d = X(:,Xi) - X(:,Yi); weights = inv (cov (x)); y = sqrt (sum ((weights * d) .* d, 1)); case "cityblock" d = X(:,Xi) - X(:,Yi); if (str2num(version()(1:3)) > 3.1) y = norm (d, 1, "cols"); else y = sum (abs (d), 1); endif case "minkowski" d = X(:,Xi) - X(:,Yi); p = 2; # default if (nargin > 2) p = varargin{1}; # explicitly assigned endif; y = norm (d, p, "cols"); case "cosine" prod = X(:,Xi) .* X(:,Yi); weights = sumsq (X(:,Xi), 1) .* sumsq (X(:,Yi), 1); y = 1 - sum (prod, 1) ./ sqrt (weights); case "correlation" if (rows(X) == 1) error ("pdist: correlation distance between scalars not defined") endif cor = corr (X); y = 1 - cor (sub2ind (size (cor), Xi, Yi))'; case "spearman" if (rows(X) == 1) error ("pdist: spearman distance between scalars not defined") endif cor = spearman (X); y = 1 - cor (sub2ind (size (cor), Xi, Yi))'; case "hamming" d = logical (X(:,Xi) - X(:,Yi)); y = sum (d, 1) / rows (X); case "jaccard" d = logical (X(:,Xi) - X(:,Yi)); weights = X(:,Xi) | X(:,Yi); y = sum (d & weights, 1) ./ sum (weights, 1); case "chebychev" d = X(:,Xi) - X(:,Yi); y = norm (d, Inf, "cols"); endswitch endif if (isempty (y)) ## Metric is a function handle or the name of an external function l = rows (x); y = zeros (1, nchoosek (l, 2)); idx = 1; for ii = 1:l-1 for jj = ii+1:l y(idx++) = feval (metric, x(ii,:), x, varargin{:})(jj); endfor endfor endif endfunction %!shared xy, t, eucl %! xy = [0 1; 0 2; 7 6; 5 6]; %! t = 1e-3; %! eucl = @(v,m) sqrt(sumsq(repmat(v,rows(m),1)-m,2)); %!assert(pdist(xy), [1.000 8.602 7.071 8.062 6.403 2.000],t); %!assert(pdist(xy,eucl), [1.000 8.602 7.071 8.062 6.403 2.000],t); %!assert(pdist(xy,"euclidean"), [1.000 8.602 7.071 8.062 6.403 2.000],t); %!assert(pdist(xy,"seuclidean"), [0.380 2.735 2.363 2.486 2.070 0.561],t); %!assert(pdist(xy,"mahalanobis"),[1.384 1.967 2.446 2.384 1.535 2.045],t); %!assert(pdist(xy,"cityblock"), [1.000 12.00 10.00 11.00 9.000 2.000],t); %!assert(pdist(xy,"minkowski"), [1.000 8.602 7.071 8.062 6.403 2.000],t); %!assert(pdist(xy,"minkowski",3),[1.000 7.763 6.299 7.410 5.738 2.000],t); %!assert(pdist(xy,"cosine"), [0.000 0.349 0.231 0.349 0.231 0.013],t); %!assert(pdist(xy,"correlation"),[0.000 2.000 0.000 2.000 0.000 2.000],t); %!assert(pdist(xy,"spearman"), [0.000 2.000 0.000 2.000 0.000 2.000],t); %!assert(pdist(xy,"hamming"), [0.500 1.000 1.000 1.000 1.000 0.500],t); %!assert(pdist(xy,"jaccard"), [1.000 1.000 1.000 1.000 1.000 0.500],t); %!assert(pdist(xy,"chebychev"), [1.000 7.000 5.000 7.000 5.000 2.000],t); statistics-1.3.0/inst/pdist2.m0000755000000000000000000001262312776476211014417 0ustar 00000000000000## Copyright (C) 2014 Piotr Dollar ## ## This program is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 3 of the ## License, or (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, see ## . ## -*- texinfo -*- ## @deftypefn {Function File} {} pdist2 (@var{x}, @var{y}) ## @deftypefnx {Function File} {} pdist2 (@var{x}, @var{y}, @var{metric}) ## Compute pairwise distance between two sets of vectors. ## ## Let @var{X} be an MxP matrix representing m points in P-dimensional space ## and @var{Y} be an NxP matrix representing another set of points in the same ## space. This function computes the M-by-N distance matrix @var{D} where ## @code{@var{D}(i,j)} is the distance between @code{@var{X}(i,:)} and ## @code{@var{Y}(j,:)}. ## ## The optional argument @var{metric} can be used to select different ## distances: ## ## @table @asis ## @item @qcode{"euclidean"} (default) ## ## @item @qcode{"sqeuclidean"} ## Compute the squared euclidean distance, i.e., the euclidean distance ## before computing square root. This is ideal when the interest is on the ## order of the euclidean distances rather than the actual distance value ## because it performs significantly faster while preserving the order. ## ## @item @qcode{"chisq'"} ## The chi-squared distance between two vectors is defined as: ## @code{d(x, y) = sum ((xi-yi)^2 / (xi+yi)) / 2}. ## The chi-squared distance is useful when comparing histograms. ## ## @item @qcode{"cosine"} ## Distance is defined as the cosine of the angle between two vectors. ## ## @item @qcode{"emd"} ## Earth Mover's Distance (EMD) between positive vectors (histograms). ## Note for 1D, with all histograms having equal weight, there is a simple ## closed form for the calculation of the EMD. The EMD between histograms ## @var{x} and @var{y} is given by @code{sum (abs (cdf (x) - cdf (y)))}, ## where @code{cdf} is the cumulative distribution function (computed ## simply by @code{cumsum}). ## ## @item @qcode{"L1"} ## The L1 distance between two vectors is defined as: @code{sum (abs (x-y))} ## ## @end table ## ## @seealso{pdist} ## @end deftypefn ## Taken from Piotr's Computer Vision Matlab Toolbox Version 2.52, with ## author permission to distribute under GPLv3 function D = pdist2 (X, Y, metric = "euclidean") if (nargin < 2 || nargin > 3) print_usage (); elseif (columns (X) != columns (Y)) error ("pdist2: X and Y must have equal number of columns"); elseif (ndims (X) != 2 || ndims (Y) != 2) error ("pdist2: X and Y must be 2 dimensional matrices"); endif switch (tolower (metric)) case "sqeuclidean", D = distEucSq (X, Y); case "euclidean", D = sqrt (distEucSq (X, Y)); case "l1", D = distL1 (X, Y); case "cosine", D = distCosine (X, Y); case "emd", D = distEmd (X, Y); case "chisq", D = distChiSq (X, Y); otherwise error ("pdist2: unknown distance METRIC %s", metric); endswitch D = max (0, D); endfunction ## TODO we could check the value of p and n first, and choose one ## or the other loop accordingly. ## L1 COMPUTATION WITH LOOP OVER p, FAST FOR SMALL p. ## function D = distL1( X, Y ) ## m = size(X,1); n = size(Y,1); p = size(X,2); ## mOnes = ones(1,m); nOnes = ones(1,n); D = zeros(m,n); ## for i=1:p ## yi = Y(:,i); yi = yi( :, mOnes ); ## xi = X(:,i); xi = xi( :, nOnes ); ## D = D + abs( xi-yi' ); ## end function D = distL1 (X, Y) m = rows (X); n = rows (Y); mOnes = ones (1, m); D = zeros (m, n); for i = 1:n yi = Y(i,:); yi = yi(mOnes,:); D(:,i) = sum (abs (X-yi), 2); endfor endfunction function D = distCosine (X, Y) p = columns (X); X = X ./ repmat (sqrt (sumsq (X, 2)), [1 p]); Y = Y ./ repmat (sqrt (sumsq (Y, 2)), [1 p]); D = 1 - X*Y'; endfunction function D = distEmd (X, Y) Xcdf = cumsum (X,2); Ycdf = cumsum (Y,2); m = rows (X); n = rows (Y); mOnes = ones (1, m); D = zeros (m, n); for i=1:n ycdf = Ycdf(i,:); ycdfRep = ycdf(mOnes,:); D(:,i) = sum (abs (Xcdf - ycdfRep), 2); endfor endfunction function D = distChiSq (X, Y) ## note: supposedly it's possible to implement this without a loop! m = rows (X); n = rows (Y); mOnes = ones (1, m); D = zeros (m, n); for i = 1:n yi = Y(i, :); yiRep = yi(mOnes, :); s = yiRep + X; d = yiRep - X; D(:,i) = sum (d.^2 ./ (s+eps), 2); endfor D = D/2; endfunction function dists = distEucSq (x, y) xx = sumsq (x, 2); yy = sumsq (y, 2)'; dists = bsxfun (@plus, xx, yy) - 2 * x * (y'); endfunction ## euclidean distance as loop for testing purposes %!function dist = euclidean_distance (x, y) %! [m, p] = size (X); %! [n, p] = size (Y); %! D = zeros (m, n); %! for i = 1:n %! d = X - repmat (Y(i,:), [m 1]); %! D(:,i) = sumsq (d, 2); %! endfor %!endfunction %!test %! x = [1 1 1; 2 2 2; 3 3 3]; %! y = [0 0 0; 1 2 3; 0 2 4; 4 7 1]; %! d = sqrt([ 3 5 11 45 %! 12 2 8 30 %! 27 5 11 21]); %! assert (pdist2 (x, y), d) statistics-1.3.0/inst/plsregress.m0000755000000000000000000000701712776476211015404 0ustar 00000000000000## Copyright (C) 2012 Fernando Damian Nieuwveldt ## ## This program is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License ## as published by the Free Software Foundation; either version 3 ## of the License, or (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{XLOADINGS},@var{YLOADINGS},@var{XSCORES},@var{YSCORES},@var{coefficients},@var{fitted}] =} ... ## plsregress(@var{X}, @var{Y}, @var{NCOMP}) ## @itemize @bullet ## @item ## @var{X}: Matrix of observations ## @item ## @var{Y}: Is a vector or matrix of responses ## @item ## @var{NCOMP}: number of components used for modelling ## @item ## @var{X} and @var{Y} will be mean centered to improve accuracy ## @end itemize ## ## @subheading References ## ## @enumerate ## @item ## SIMPLS: An alternative approach to partial least squares regression. Chemometrics and Intelligent Laboratory ## Systems (1993) ## ## @end enumerate ## @end deftypefn ## Author: Fernando Damian Nieuwveldt ## Description: Partial least squares regression using SIMPLS algorithm function [XLOADINGS, YLOADINGS, XSCORES, YSCORES, coefficients, fitted] = plsregress (X, Y, NCOMP) if nargout != 6 print_usage(); end nobs = rows (X); # Number of observations npred = columns (X); # Number of predictor variables nresp = columns (Y); # Number of responses if (! isnumeric (X) || ! isnumeric (Y)) error ("plsregress:Data matrix X and reponse matrix Y must be real matrices"); elseif (nobs != rows (Y)) error ("plsregress:Number of observations for Data matrix X and Response Matrix Y must be equal"); elseif(! isscalar (NCOMP)) error ("plsregress: Third argument must be a scalar"); end ## Mean centering Data matrix Xmeans = mean (X); X = bsxfun (@minus, X, Xmeans); ## Mean centering responses Ymeans = mean (Y); Y = bsxfun (@minus, Y, Ymeans); S = X'*Y; R = P = V = zeros (npred, NCOMP); T = U = zeros (nobs, NCOMP); Q = zeros (nresp, NCOMP); for a = 1:NCOMP [eigvec eigval] = eig (S'*S); # Y factor weights domindex = find (diag (eigval) == max (diag (eigval))); # get dominant eigenvector q = eigvec(:,domindex); r = S*q; # X block factor weights t = X*r; # X block factor scores t = t - mean (t); nt = sqrt (t'*t); # compute norm t = t/nt; r = r/nt; # normalize p = X'*t; # X block factor loadings q = Y'*t; # Y block factor loadings u = Y*q; # Y block factor scores v = p; ## Ensure orthogonality if a > 1 v = v - V*(V'*p); u = u - T*(T'*u); endif v = v/sqrt(v'*v); # normalize orthogonal loadings S = S - v*(v'*S); # deflate S wrt loadings ## Store data R(:,a) = r; T(:,a) = t; P(:,a) = p; Q(:,a) = q; U(:,a) = u; V(:,a) = v; endfor ## Regression coefficients B = R*Q'; fitted = bsxfun (@plus, T*Q', Ymeans); # Add mean ## Return coefficients = B; XSCORES = T; XLOADINGS = P; YSCORES = U; YLOADINGS = Q; projection = R; endfunction statistics-1.3.0/inst/poisstat.m0000755000000000000000000000452712776476211015064 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} poisstat (@var{lambda}) ## Compute mean and variance of the Poisson distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{lambda} is the parameter of the Poisson distribution. The ## elements of @var{lambda} must be positive ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the Poisson distribution ## ## @item ## @var{v} is the variance of the Poisson distribution ## @end itemize ## ## @subheading Example ## ## @example ## @group ## lambda = 1 ./ (1:6); ## [m, v] = poisstat (lambda) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the Poisson distribution function [m, v] = poisstat (lambda) # Check arguments if (nargin != 1) print_usage (); endif if (! isempty (lambda) && ! ismatrix (lambda)) error ("poisstat: lambda must be a numeric matrix"); endif # Set moments m = lambda; v = lambda; # Continue argument check k = find (! (lambda > 0) | ! (lambda < Inf)); if (any (k)) m(k) = NaN; v(k) = NaN; endif endfunction %!test %! lambda = 1 ./ (1:6); %! [m, v] = poisstat (lambda); %! assert (m, lambda); %! assert (v, lambda); statistics-1.3.0/inst/princomp.m0000755000000000000000000001242112776476211015035 0ustar 00000000000000## Copyright (C) 2013 Fernando Damian Nieuwveldt ## ## This program is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License ## as published by the Free Software Foundation; either version 3 ## of the License, or (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{COEFF}]} = princomp(@var{X}) ## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE}]} = princomp(@var{X}) ## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE},@var{latent}]} = princomp(@var{X}) ## @deftypefnx {Function File} {[@var{COEFF},@var{SCORE},@var{latent},@var{tsquare}]} = princomp(@var{X}) ## @deftypefnx {Function File} {[...]} = princomp(@var{X},'econ') ## @itemize @bullet ## @item ## princomp performs principal component analysis on a NxP data matrix X ## @item ## @var{COEFF} : returns the principal component coefficients ## @item ## @var{SCORE} : returns the principal component scores, the representation of X ## in the principal component space ## @item ## @var{LATENT} : returns the principal component variances, i.e., the ## eigenvalues of the covariance matrix X. ## @item ## @var{TSQUARE} : returns Hotelling's T-squared Statistic for each observation in X ## @item ## [...] = princomp(X,'econ') returns only the elements of latent that are not ## necessarily zero, and the corresponding columns of COEFF and SCORE, that is, ## when n <= p, only the first n-1. This can be significantly faster when p is ## much larger than n. In this case the svd will be applied on the transpose of ## the data matrix X ## ## @end itemize ## ## @subheading References ## ## @enumerate ## @item ## Jolliffe, I. T., Principal Component Analysis, 2nd Edition, Springer, 2002 ## ## @end enumerate ## @end deftypefn function [COEFF,SCORE,latent,tsquare] = princomp(X,varargin) if (nargin < 1 || nargin > 2) print_usage (); endif if (nargin == 2 && ! strcmpi (varargin{:}, "econ")) error ("princomp: if a second input argument is present, it must be the string 'econ'"); endif [nobs nvars] = size(X); # Center the columns to mean zero Xcentered = bsxfun(@minus,X,mean(X)); # Check if there are more variables then observations if nvars <= nobs [U,S,COEFF] = svd(Xcentered, "econ"); else # Calculate the svd on the transpose matrix, much faster if (nargin == 2 && strcmpi ( varargin{:} , "econ")) [COEFF,S,V] = svd(Xcentered' , 'econ'); else [COEFF,S,V] = svd(Xcentered'); endif endif if nargout > 1 # Get the Scores SCORE = Xcentered*COEFF; # Get the rank of the SCORE matrix r = rank(SCORE); # Only use the first r columns, pad rest with zeros if economy != 'econ' SCORE = SCORE(:,1:r) ; if !(nargin == 2 && strcmpi ( varargin{:} , "econ")) SCORE = [SCORE, zeros(nobs , nvars-r)]; else COEFF = COEFF(: , 1:r); endif endif if nargout > 2 # This is the same as the eigenvalues of the covariance matrix of X latent = (diag(S'*S)/(size(Xcentered,1)-1))(1:r); if !(nargin == 2 && strcmpi ( varargin{:} , "econ")) latent= [latent;zeros(nvars-r,1)]; endif endif if nargout > 3 # Calculate the Hotelling T-Square statistic for the observations tsquare = sumsq(zscore(SCORE(:,1:r)),2); endif endfunction %!shared COEFF,SCORE,latent,tsquare,m,x,R,V,lambda,i,S,F #NIST Engineering Statistics Handbook example (6.5.5.2) %!test %! x=[7 4 3 %! 4 1 8 %! 6 3 5 %! 8 6 1 %! 8 5 7 %! 7 2 9 %! 5 3 3 %! 9 5 8 %! 7 4 5 %! 8 2 2]; %! R = corrcoef (x); %! [V, lambda] = eig (R); %! [~, i] = sort(diag(lambda), "descend"); #arrange largest PC first %! S = V(:, i) * diag(sqrt(diag(lambda)(i))); %!assert(diag(S(:, 1:2)*S(:, 1:2)'), [0.8662; 0.8420; 0.9876], 1E-4); #contribution of first 2 PCs to each original variable %! B = V(:, i) * diag( 1./ sqrt(diag(lambda)(i))); %! F = zscore(x)*B; %! [COEFF,SCORE,latent,tsquare] = princomp(zscore(x, 1)); %!assert(tsquare,sumsq(F, 2),1E4*eps); %!test %! x=[1,2,3;2,1,3]'; %! [COEFF,SCORE,latent,tsquare] = princomp(x); %! m=[sqrt(2),sqrt(2);sqrt(2),-sqrt(2);-2*sqrt(2),0]/2; %! m(:,1) = m(:,1)*sign(COEFF(1,1)); %! m(:,2) = m(:,2)*sign(COEFF(1,2)); %!assert(COEFF,m(1:2,:),10*eps); %!assert(SCORE,-m,10*eps); %!assert(latent,[1.5;.5],10*eps); %!assert(tsquare,[2;2;2],10*eps); %!test %! x=x'; %! [COEFF,SCORE,latent,tsquare] = princomp(x); %! m=[sqrt(2),sqrt(2),0;-sqrt(2),sqrt(2),0;0,0,2]/2; %! m(:,1) = m(:,1)*sign(COEFF(1,1)); %! m(:,2) = m(:,2)*sign(COEFF(1,2)); %! m(:,3) = m(:,3)*sign(COEFF(3,3)); %!assert(COEFF,m,10*eps); %!assert(SCORE(:,1),-m(1:2,1),10*eps); %!assert(SCORE(:,2:3),zeros(2),10*eps); %!assert(latent,[1;0;0],10*eps); %!assert(tsquare,[1;1],10*eps) %!test %! [COEFF,SCORE,latent,tsquare] = princomp(x, "econ"); %!assert(COEFF,m(:, 1),10*eps); %!assert(SCORE,-m(1:2,1),10*eps); %!assert(latent,[1],10*eps); %!assert(tsquare,[1;1],10*eps) statistics-1.3.0/inst/private/tbl_delim.m0000755000000000000000000000614412776476211016620 0ustar 00000000000000## Copyright (C) 2008 Bill Denney ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{d}, @var{err}] = } tbl_delim (@var{d}) ## Return the delimiter for tblread or tblwrite. ## ## The delimeter, @var{d} may be any single character or ## @itemize ## @item "space" " " (default) ## @item "tab" "\t" ## @item "comma" "," ## @item "semi" ";" ## @item "bar" "|" ## @end itemize ## ## @var{err} will be empty if there is no error, and @var{d} will be NaN ## if there is an error. You MUST check the value of @var{err}. ## @seealso{tblread, tblwrite} ## @end deftypefn function [d, err] = tbl_delim (d) ## Check arguments if nargin != 1 print_usage (); endif err = ""; ## Format the delimiter if ischar (d) ## allow for escape characters d = sprintf (d); if numel (d) > 1 ## allow the word forms s.space = " "; s.tab = "\t"; s.comma = ","; s.semi = ";"; s.bar = "|"; if ! ismember (d, fieldnames (s)) err = ["tblread: delimiter must be either a single " ... "character or one of\n" ... sprintf("%s, ", fieldnames (s){:})(1:end-2)]; d = NaN; else d = s.(d); endif endif else err = "delimiter must be a character"; d = NaN; endif if isempty (d) err = "the delimiter may not be empty"; d = NaN; endif endfunction ## Tests ## The defaults %!test %! [d err] = tbl_delim (" "); %! assert (d, " "); %! assert (err, ""); ## Named delimiters %!test %! [d err] = tbl_delim ("space"); %! assert (d, " "); %! assert (err, ""); %!test %! [d err] = tbl_delim ("tab"); %! assert (d, sprintf ("\t")); %! assert (err, ""); %!test %! [d err] = tbl_delim ("comma"); %! assert (d, ","); %! assert (err, ""); %!test %! [d err] = tbl_delim ("semi"); %! assert (d, ";"); %! assert (err, ""); %!test %! [d err] = tbl_delim ("bar"); %! assert (d, "|"); %! assert (err, ""); ## An arbitrary character %!test %! [d err] = tbl_delim ("x"); %! assert (d, "x"); %! assert (err, ""); ## An arbitrary escape string %!test %! [d err] = tbl_delim ('\r'); %! assert (d, sprintf ('\r')) %! assert (err, ""); ## Errors %!test %! [d err] = tbl_delim ("bars"); %! assert (isnan (d)); %! assert (! isempty (err)); %!test %! [d err] = tbl_delim (""); %! assert (isnan (d)); %! assert (! isempty (err)); %!test %! [d err] = tbl_delim (5); %! assert (isnan (d)); %! assert (! isempty (err)); %!test %! [d err] = tbl_delim ({"."}); %! assert (isnan (d)); %! assert (! isempty (err)); statistics-1.3.0/inst/qrandn.m0000755000000000000000000000533712776476211014501 0ustar 00000000000000## Copyright (C) 2014 - Juan Pablo Carbajal ## ## This progrm is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## Author: Juan Pablo Carbajal ## -*- texinfo -*- ## @deftypefn {Function File} {@var{z} =} qrandn (@var{q}, @var{r},@var{c}) ## @deftypefnx {Function File} {@var{z} =} qrandn (@var{q}, [@var{r},@var{c}]) ## Returns random deviates drawn from a q-Gaussian distribution. ## ## Parameter @var{q} charcterizes the q-Gaussian distribution. ## The result has the size indicated by @var{s}. ## ## Reference: ## W. Thistleton, J. A. Marsh, K. Nelson, C. Tsallis (2006) ## "Generalized Box-Muller method for generating q-Gaussian random deviates" ## arXiv:cond-mat/0605570 http://arxiv.org/abs/cond-mat/0605570 ## ## @seealso{rand, randn} ## @end deftypefn function z = qrandn(q,R,C=[]) if !isscalar (q) error ('Octave:invalid-input-arg', 'The parameter q must be a scalar.') endif # Check that q < 3 if q > 3 error ('Octave:invalid-input-arg', 'The parameter q must be lower than 3.'); endif if numel (R) > 1 S = R; elseif numel (R) ==1 && isempty (C) S = [R,1]; elseif numel (R) ==1 && !isempty (C) S = [R,C]; endif # Calaulate the q to be used on the q-log qGen = (1 + q) / (3 - q); # Initialize the output vector z = sqrt (-2 * log_q (rand (S),qGen)) .* sin (2*pi*rand (S)); endfunction function a = log_q (x,q) # # Returns the q-log of x, using q # dq = 1 - q; # Check to see if q = 1 (to double precision) if abs (dq) < 10*eps # If q is 1, use the usual natural logarithm a = log (x); else # If q differs from 1, use the definition of the q-log a = ( x .^ dq - 1 ) ./ dq; endif endfunction %!demo %! z = qrandn (-5, 5e6); %! [c x] = hist (z,linspace(-1.5,1.5,200),1); %! figure(1) %! plot(x,c,"r."); axis tight; axis([-1.5,1.5]); %! %! z = qrandn (-0.14286, 5e6); %! [c x] = hist (z,linspace(-2,2,200),1); %! figure(2) %! plot(x,c,"r."); axis tight; axis([-2,2]); %! %! z = qrandn (2.75, 5e6); %! [c x] = hist (z,linspace(-1e3,1e3,1e3),1); %! figure(3) %! semilogy(x,c,"r."); axis tight; axis([-100,100]); %! %! # --------- %! # Figures from the reference paper. statistics-1.3.0/inst/random.m0000755000000000000000000001546212776476211014476 0ustar 00000000000000## Copyright (C) 2007 Soren Hauberg ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} @var{r} = random(@var{name}, @var{arg1}) ## @deftypefnx{Function File} @var{r} = random(@var{name}, @var{arg1}, @var{arg2}) ## @deftypefnx{Function File} @var{r} = random(@var{name}, @var{arg1}, @var{arg2}, @var{arg3}) ## @deftypefnx{Function File} @var{r} = random(@var{name}, ..., @var{s1}, ...) ## Generates pseudo-random numbers from a given one-, two-, or three-parameter ## distribution. ## ## The variable @var{name} must be a string that names the distribution from ## which to sample. If this distribution is a one-parameter distribution @var{arg1} ## should be supplied, if it is a two-paramter distribution @var{arg2} must also ## be supplied, and if it is a three-parameter distribution @var{arg3} must also ## be present. Any arguments following the distribution paramters will determine ## the size of the result. ## ## As an example, the following code generates a 10 by 20 matrix containing ## random numbers from a normal distribution with mean 5 and standard deviation ## 2. ## @example ## R = random("normal", 5, 2, [10, 20]); ## @end example ## ## The variable @var{name} can be one of the following strings ## ## @table @asis ## @item "beta" ## @itemx "beta distribution" ## Samples are drawn from the Beta distribution. ## @item "bino" ## @itemx "binomial" ## @itemx "binomial distribution" ## Samples are drawn from the Binomial distribution. ## @item "chi2" ## @itemx "chi-square" ## @itemx "chi-square distribution" ## Samples are drawn from the Chi-Square distribution. ## @item "exp" ## @itemx "exponential" ## @itemx "exponential distribution" ## Samples are drawn from the Exponential distribution. ## @item "f" ## @itemx "f distribution" ## Samples are drawn from the F distribution. ## @item "gam" ## @itemx "gamma" ## @itemx "gamma distribution" ## Samples are drawn from the Gamma distribution. ## @item "geo" ## @itemx "geometric" ## @itemx "geometric distribution" ## Samples are drawn from the Geometric distribution. ## @item "hyge" ## @itemx "hypergeometric" ## @itemx "hypergeometric distribution" ## Samples are drawn from the Hypergeometric distribution. ## @item "logn" ## @itemx "lognormal" ## @itemx "lognormal distribution" ## Samples are drawn from the Log-Normal distribution. ## @item "nbin" ## @itemx "negative binomial" ## @itemx "negative binomial distribution" ## Samples are drawn from the Negative Binomial distribution. ## @item "norm" ## @itemx "normal" ## @itemx "normal distribution" ## Samples are drawn from the Normal distribution. ## @item "poiss" ## @itemx "poisson" ## @itemx "poisson distribution" ## Samples are drawn from the Poisson distribution. ## @item "rayl" ## @itemx "rayleigh" ## @itemx "rayleigh distribution" ## Samples are drawn from the Rayleigh distribution. ## @item "t" ## @itemx "t distribution" ## Samples are drawn from the T distribution. ## @item "unif" ## @itemx "uniform" ## @itemx "uniform distribution" ## Samples are drawn from the Uniform distribution. ## @item "unid" ## @itemx "discrete uniform" ## @itemx "discrete uniform distribution" ## Samples are drawn from the Uniform Discrete distribution. ## @item "wbl" ## @itemx "weibull" ## @itemx "weibull distribution" ## Samples are drawn from the Weibull distribution. ## @end table ## @seealso{rand, betarnd, binornd, chi2rnd, exprnd, frnd, gamrnd, geornd, hygernd, ## lognrnd, nbinrnd, normrnd, poissrnd, raylrnd, trnd, unifrnd, unidrnd, wblrnd} ## @end deftypefn function retval = random(name, varargin) ## General input checking if (nargin < 2) print_usage(); endif if (!ischar(name)) error("random: first input argument must be a string"); endif ## Select distribution switch (lower(name)) case {"beta", "beta distribution"} retval = betarnd(varargin{:}); case {"bino", "binomial", "binomial distribution"} retval = binornd(varargin{:}); case {"chi2", "chi-square", "chi-square distribution"} retval = chi2rnd(varargin{:}); case {"exp", "exponential", "exponential distribution"} retval = exprnd(varargin{:}); case {"ev", "extreme value", "extreme value distribution"} error("random: distribution type '%s' is not yet implemented", name); case {"f", "f distribution"} retval = frnd(varargin{:}); case {"gam", "gamma", "gamma distribution"} retval = gamrnd(varargin{:}); case {"gev", "generalized extreme value", "generalized extreme value distribution"} error("random: distribution type '%s' is not yet implemented", name); case {"gp", "generalized pareto", "generalized pareto distribution"} error("random: distribution type '%s' is not yet implemented", name); case {"geo", "geometric", "geometric distribution"} retval = geornd(varargin{:}); case {"hyge", "hypergeometric", "hypergeometric distribution"} retval = hygernd(varargin{:}); case {"logn", "lognormal", "lognormal distribution"} retval = lognrnd(varargin{:}); case {"nbin", "negative binomial", "negative binomial distribution"} retval = nbinrnd(varargin{:}); case {"ncf", "noncentral f", "noncentral f distribution"} error("random: distribution type '%s' is not yet implemented", name); case {"nct", "noncentral t", "noncentral t distribution"} error("random: distribution type '%s' is not yet implemented", name); case {"ncx2", "noncentral chi-square", "noncentral chi-square distribution"} error("random: distribution type '%s' is not yet implemented", name); case {"norm", "normal", "normal distribution"} retval = normrnd(varargin{:}); case {"poiss", "poisson", "poisson distribution"} retval = poissrnd(varargin{:}); case {"rayl", "rayleigh", "rayleigh distribution"} retval = raylrnd(varargin{:}); case {"t", "t distribution"} retval = trnd(varargin{:}); case {"unif", "uniform", "uniform distribution"} retval = unifrnd(varargin{:}); case {"unid", "discrete uniform", "discrete uniform distribution"} retval = unidrnd(varargin{:}); case {"wbl", "weibull", "weibull distribution"} retval = wblrnd(varargin{:}); otherwise error("random: unsupported distribution type '%s'", name); endswitch endfunction statistics-1.3.0/inst/randsample.m0000755000000000000000000001046012776476211015335 0ustar 00000000000000## Copyright (C) 2014 - Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## Author: Nir Krakauer ## -*- texinfo -*- ## @deftypefn {Function File} {@var{y} =} randsample (@var{v}, @var{k}, @var{replacement}=false [, @var{w}]) ## Elements sampled from a vector. ## ## Returns @var{k} random elements from a vector @var{v} with @var{n} elements, sampled without or with @var{replacement}. ## ## If @var{v} is a scalar, samples from 1:@var{v}. ## ## If a weight vector @var{w} of the same size as @var{v} is specified, the probablility of each element being sampled is proportional to @var{w}. Unlike Matlab's function of the same name, this can be done for sampling with or without replacement. ## ## Randomization is performed using rand(). ## ## @seealso{randperm} ## @end deftypefn function y = randsample(v,k,replacement=false,w=[]) if (isscalar (v) && isreal (v)) n = v; vector_v = false; elseif (isvector (v)) n = numel (v); vector_v = true; else error ('Octave:invalid-input-arg', 'randsample: The input v must be a vector or positive integer.'); endif if k < 0 || ( k > n && !replacement ) error ('Octave:invalid-input-arg', 'randsample: The input k must be a non-negative integer. Sampling without replacement needs k <= n.'); endif if (all (length (w) != [0, n])) error ('Octave:invalid-input-arg', 'randsample: the size w (%d) must match the first argument (%d)', length(w), n); endif if (replacement) # sample with replacement if (isempty (w)) # all elements are equally likely to be sampled y = round (n * rand(1, k) + 0.5); else y = weighted_replacement (k, w); endif else # sample without replacement if (isempty (w)) # all elements are equally likely to be sampled y = randperm (n, k); else # use "accept-reject"-like sampling y = weighted_replacement (k, w); while (1) [yy, idx] = sort (y); # Note: sort keeps order of equal elements. Idup = [false, (diff (yy)==0)]; if !any (Idup) break else Idup(idx) = Idup; # find duplicates in original vector w(y) = 0; # don't permit resampling # remove duplicates, then sample again y = [y(~Idup), (weighted_replacement (sum (Idup), w))]; endif endwhile endif endif if vector_v y = v(y); endif endfunction function y = weighted_replacement (k, w) w = w / sum(w); w = [0 cumsum(w(:))']; # distribute k uniform random deviates based on the given weighting y = arrayfun (@(x) find (w <= x, 1, "last"), rand (1, k)); endfunction %!test %! n = 20; %! k = 5; %! x = randsample(n, k); %! assert (size(x), [1 k]); %! x = randsample(n, k, true); %! assert (size(x), [1 k]); %! x = randsample(n, k, false); %! assert (size(x), [1 k]); %! x = randsample(n, k, true, ones(n, 1)); %! assert (size(x), [1 k]); %! x = randsample(1:n, k); %! assert (size(x), [1 k]); %! x = randsample(1:n, k, true); %! assert (size(x), [1 k]); %! x = randsample(1:n, k, false); %! assert (size(x), [1 k]); %! x = randsample(1:n, k, true, ones(n, 1)); %! assert (size(x), [1 k]); %! x = randsample((1:n)', k); %! assert (size(x), [k 1]); %! x = randsample((1:n)', k, true); %! assert (size(x), [k 1]); %! x = randsample((1:n)', k, false); %! assert (size(x), [k 1]); %! x = randsample((1:n)', k, true, ones(n, 1)); %! assert (size(x), [k 1]); %! n = 10; %! k = 100; %! x = randsample(n, k, true, 1:n); %! assert (size(x), [1 k]); %! x = randsample((1:n)', k, true); %! assert (size(x), [k 1]); %! x = randsample(k, k, false, 1:k); %! assert (size(x), [1 k]); statistics-1.3.0/inst/raylcdf.m0000755000000000000000000000617412776476211014642 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{p} =} raylcdf (@var{x}, @var{sigma}) ## Compute the cumulative distribution function of the Rayleigh ## distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{x} is the support. The elements of @var{x} must be non-negative. ## ## @item ## @var{sigma} is the parameter of the Rayleigh distribution. The elements ## of @var{sigma} must be positive. ## @end itemize ## @var{x} and @var{sigma} must be of common size or one of them must be ## scalar. ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{p} is the cumulative distribution of the Rayleigh distribution at ## each element of @var{x} and corresponding parameter @var{sigma}. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = 0:0.5:2.5; ## sigma = 1:6; ## p = raylcdf (x, sigma) ## @end group ## ## @group ## p = raylcdf (x, 0.5) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. pages 104 and 148, McGraw-Hill, New York, second edition, ## 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: CDF of the Rayleigh distribution function p = raylcdf (x, sigma) # Check arguments if (nargin != 2) print_usage (); endif if (! isempty (x) && ! ismatrix (x)) error ("raylcdf: x must be a numeric matrix"); endif if (! isempty (sigma) && ! ismatrix (sigma)) error ("raylcdf: sigma must be a numeric matrix"); endif if (! isscalar (x) || ! isscalar (sigma)) [retval, x, sigma] = common_size (x, sigma); if (retval > 0) error ("raylcdf: x and sigma must be of common size or scalar"); endif endif # Calculate cdf p = 1 - exp ((-x .^ 2) ./ (2 * sigma .^ 2)); # Continue argument check k = find (! (x >= 0) | ! (x < Inf) | ! (sigma > 0)); if (any (k)) p(k) = NaN; endif endfunction %!test %! x = 0:0.5:2.5; %! sigma = 1:6; %! p = raylcdf (x, sigma); %! expected_p = [0.0000, 0.0308, 0.0540, 0.0679, 0.0769, 0.0831]; %! assert (p, expected_p, 0.001); %!test %! x = 0:0.5:2.5; %! p = raylcdf (x, 0.5); %! expected_p = [0.0000, 0.3935, 0.8647, 0.9889, 0.9997, 1.0000]; %! assert (p, expected_p, 0.001); statistics-1.3.0/inst/raylinv.m0000755000000000000000000000636312776476211014702 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{x} =} raylinv (@var{p}, @var{sigma}) ## Compute the quantile of the Rayleigh distribution. The quantile is the ## inverse of the cumulative distribution function. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{p} is the cumulative distribution. The elements of @var{p} must be ## probabilities. ## ## @item ## @var{sigma} is the parameter of the Rayleigh distribution. The elements ## of @var{sigma} must be positive. ## @end itemize ## @var{p} and @var{sigma} must be of common size or one of them must be ## scalar. ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{x} is the quantile of the Rayleigh distribution at each element of ## @var{p} and corresponding parameter @var{sigma}. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## p = 0:0.1:0.5; ## sigma = 1:6; ## x = raylinv (p, sigma) ## @end group ## ## @group ## x = raylinv (p, 0.5) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. pages 104 and 148, McGraw-Hill, New York, second edition, ## 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Quantile of the Rayleigh distribution function x = raylinv (p, sigma) # Check arguments if (nargin != 2) print_usage (); endif if (! isempty (p) && ! ismatrix (p)) error ("raylinv: p must be a numeric matrix"); endif if (! isempty (sigma) && ! ismatrix (sigma)) error ("raylinv: sigma must be a numeric matrix"); endif if (! isscalar (p) || ! isscalar (sigma)) [retval, p, sigma] = common_size (p, sigma); if (retval > 0) error ("raylinv: p and sigma must be of common size or scalar"); endif endif # Calculate quantile x = sqrt (-2 .* log (1 - p) .* sigma .^ 2); k = find (p == 1); if (any (k)) x(k) = Inf; endif # Continue argument check k = find (! (p >= 0) | ! (p <= 1) | ! (sigma > 0)); if (any (k)) x(k) = NaN; endif endfunction %!test %! p = 0:0.1:0.5; %! sigma = 1:6; %! x = raylinv (p, sigma); %! expected_x = [0.0000, 0.9181, 2.0041, 3.3784, 5.0538, 7.0645]; %! assert (x, expected_x, 0.001); %!test %! p = 0:0.1:0.5; %! x = raylinv (p, 0.5); %! expected_x = [0.0000, 0.2295, 0.3340, 0.4223, 0.5054, 0.5887]; %! assert (x, expected_x, 0.001); statistics-1.3.0/inst/raylpdf.m0000755000000000000000000000620312776476211014650 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{y} =} raylpdf (@var{x}, @var{sigma}) ## Compute the probability density function of the Rayleigh distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{x} is the support. The elements of @var{x} must be non-negative. ## ## @item ## @var{sigma} is the parameter of the Rayleigh distribution. The elements ## of @var{sigma} must be positive. ## @end itemize ## @var{x} and @var{sigma} must be of common size or one of them must be ## scalar. ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{y} is the probability density of the Rayleigh distribution at each ## element of @var{x} and corresponding parameter @var{sigma}. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## x = 0:0.5:2.5; ## sigma = 1:6; ## y = raylpdf (x, sigma) ## @end group ## ## @group ## y = raylpdf (x, 0.5) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. pages 104 and 148, McGraw-Hill, New York, second edition, ## 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: PDF of the Rayleigh distribution function y = raylpdf (x, sigma) # Check arguments if (nargin != 2) print_usage (); endif if (! isempty (x) && ! ismatrix (x)) error ("raylpdf: x must be a numeric matrix"); endif if (! isempty (sigma) && ! ismatrix (sigma)) error ("raylpdf: sigma must be a numeric matrix"); endif if (! isscalar (x) || ! isscalar (sigma)) [retval, x, sigma] = common_size (x, sigma); if (retval > 0) error ("raylpdf: x and sigma must be of common size or scalar"); endif endif # Calculate pdf y = x .* exp ((-x .^ 2) ./ (2 .* sigma .^ 2)) ./ (sigma .^ 2); # Continue argument check k = find (! (x >= 0) | ! (x < Inf) | ! (sigma > 0)); if (any (k)) y(k) = NaN; endif endfunction %!test %! x = 0:0.5:2.5; %! sigma = 1:6; %! y = raylpdf (x, sigma); %! expected_y = [0.0000, 0.1212, 0.1051, 0.0874, 0.0738, 0.0637]; %! assert (y, expected_y, 0.001); %!test %! x = 0:0.5:2.5; %! y = raylpdf (x, 0.5); %! expected_y = [0.0000, 1.2131, 0.5413, 0.0667, 0.0027, 0.0000]; %! assert (y, expected_y, 0.001); statistics-1.3.0/inst/raylrnd.m0000755000000000000000000001053512776476211014665 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{x} =} raylrnd (@var{sigma}) ## @deftypefnx {Function File} {@var{x} =} raylrnd (@var{sigma}, @var{sz}) ## @deftypefnx {Function File} {@var{x} =} raylrnd (@var{sigma}, @var{r}, @var{c}) ## Generate a matrix of random samples from the Rayleigh distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{sigma} is the parameter of the Rayleigh distribution. The elements ## of @var{sigma} must be positive. ## ## @item ## @var{sz} is the size of the matrix to be generated. @var{sz} must be a ## vector of non-negative integers. ## ## @item ## @var{r} is the number of rows of the matrix to be generated. @var{r} must ## be a non-negative integer. ## ## @item ## @var{c} is the number of columns of the matrix to be generated. @var{c} ## must be a non-negative integer. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{x} is a matrix of random samples from the Rayleigh distribution with ## corresponding parameter @var{sigma}. If neither @var{sz} nor @var{r} and ## @var{c} are specified, then @var{x} is of the same size as @var{sigma}. ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## sigma = 1:6; ## x = raylrnd (sigma) ## @end group ## ## @group ## sz = [2, 3]; ## x = raylrnd (0.5, sz) ## @end group ## ## @group ## r = 2; ## c = 3; ## x = raylrnd (0.5, r, c) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. pages 104 and 148, McGraw-Hill, New York, second edition, ## 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Random samples from the Rayleigh distribution function x = raylrnd (sigma, r, c) # Check arguments if (nargin == 1) sz = size (sigma); elseif (nargin == 2) if (! isvector (r) || any ((r < 0) | round (r) != r)) error ("raylrnd: sz must be a vector of non-negative integers") endif sz = r(:)'; if (! isscalar (sigma) && ! isempty (sigma) && (length (size (sigma)) != length (sz) || any (size (sigma) != sz))) error ("raylrnd: sigma must be scalar or of size sz"); endif elseif (nargin == 3) if (! isscalar (r) || any ((r < 0) | round (r) != r)) error ("raylrnd: r must be a non-negative integer") endif if (! isscalar (c) || any ((c < 0) | round (c) != c)) error ("raylrnd: c must be a non-negative integer") endif sz = [r, c]; if (! isscalar (sigma) && ! isempty (sigma) && (length (size (sigma)) != length (sz) || any (size (sigma) != sz))) error ("raylrnd: sigma must be scalar or of size [r, c]"); endif else print_usage (); endif if (! isempty (sigma) && ! ismatrix (sigma)) error ("raylrnd: sigma must be a numeric matrix"); endif if (isempty (sigma)) x = []; elseif (isscalar (sigma) && ! (sigma > 0)) x = NaN .* ones (sz); else # Draw random samples x = sqrt (-2 .* log (1 - rand (sz)) .* sigma .^ 2); # Continue argument check k = find (! (sigma > 0)); if (any (k)) x(k) = NaN; endif endif endfunction %!test %! sigma = 1:6; %! x = raylrnd (sigma); %! assert (size (x), size (sigma)); %! assert (all (x >= 0)); %!test %! sigma = 0.5; %! sz = [2, 3]; %! x = raylrnd (sigma, sz); %! assert (size (x), sz); %! assert (all (x >= 0)); %!test %! sigma = 0.5; %! r = 2; %! c = 3; %! x = raylrnd (sigma, r, c); %! assert (size (x), [r, c]); %! assert (all (x >= 0)); statistics-1.3.0/inst/raylstat.m0000755000000000000000000000477212776476211015063 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} raylstat (@var{sigma}) ## Compute mean and variance of the Rayleigh distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{sigma} is the parameter of the Rayleigh distribution. The elements ## of @var{sigma} must be positive. ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the Rayleigh distribution. ## ## @item ## @var{v} is the variance of the Rayleigh distribution. ## @end itemize ## ## @subheading Example ## ## @example ## @group ## sigma = 1:6; ## [m, v] = raylstat (sigma) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the Rayleigh distribution function [m, v] = raylstat (sigma) # Check arguments if (nargin != 1) print_usage (); endif if (! isempty (sigma) && ! ismatrix (sigma)) error ("raylstat: sigma must be a numeric matrix"); endif # Calculate moments m = sigma .* sqrt (pi ./ 2); v = (2 - pi ./ 2) .* sigma .^ 2; # Continue argument check k = find (! (sigma > 0)); if (any (k)) m(k) = NaN; v(k) = NaN; endif endfunction %!test %! sigma = 1:6; %! [m, v] = raylstat (sigma); %! expected_m = [1.2533, 2.5066, 3.7599, 5.0133, 6.2666, 7.5199]; %! expected_v = [0.4292, 1.7168, 3.8628, 6.8673, 10.7301, 15.4513]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); statistics-1.3.0/inst/regress.m0000755000000000000000000001521112776476211014660 0ustar 00000000000000## Copyright (C) 2005, 2006 William Poetra Yoga Hadisoeseno ## Copyright (C) 2011 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{b}, @var{bint}, @var{r}, @var{rint}, @var{stats}] =} regress (@var{y}, @var{X}, [@var{alpha}]) ## Multiple Linear Regression using Least Squares Fit of @var{y} on @var{X} ## with the model @code{y = X * beta + e}. ## ## Here, ## ## @itemize ## @item ## @code{y} is a column vector of observed values ## @item ## @code{X} is a matrix of regressors, with the first column filled with ## the constant value 1 ## @item ## @code{beta} is a column vector of regression parameters ## @item ## @code{e} is a column vector of random errors ## @end itemize ## ## Arguments are ## ## @itemize ## @item ## @var{y} is the @code{y} in the model ## @item ## @var{X} is the @code{X} in the model ## @item ## @var{alpha} is the significance level used to calculate the confidence ## intervals @var{bint} and @var{rint} (see `Return values' below). If not ## specified, ALPHA defaults to 0.05 ## @end itemize ## ## Return values are ## ## @itemize ## @item ## @var{b} is the @code{beta} in the model ## @item ## @var{bint} is the confidence interval for @var{b} ## @item ## @var{r} is a column vector of residuals ## @item ## @var{rint} is the confidence interval for @var{r} ## @item ## @var{stats} is a row vector containing: ## ## @itemize ## @item The R^2 statistic ## @item The F statistic ## @item The p value for the full model ## @item The estimated error variance ## @end itemize ## @end itemize ## ## @var{r} and @var{rint} can be passed to @code{rcoplot} to visualize ## the residual intervals and identify outliers. ## ## NaN values in @var{y} and @var{X} are removed before calculation begins. ## ## @end deftypefn ## References: ## - Matlab 7.0 documentation (pdf) ## - ¡¶´óѧÊýѧʵÑé¡· ½ªÆôÔ´ µÈ (textbook) ## - http://www.netnam.vn/unescocourse/statistics/12_5.htm ## - wsolve.m in octave-forge ## - http://www.stanford.edu/class/ee263/ls_ln_matlab.pdf function [b, bint, r, rint, stats] = regress (y, X, alpha) if (nargin < 2 || nargin > 3) print_usage; endif if (! ismatrix (y)) error ("regress: y must be a numeric matrix"); endif if (! ismatrix (X)) error ("regress: X must be a numeric matrix"); endif if (columns (y) != 1) error ("regress: y must be a column vector"); endif if (rows (y) != rows (X)) error ("regress: y and X must contain the same number of rows"); endif if (nargin < 3) alpha = 0.05; elseif (! isscalar (alpha)) error ("regress: alpha must be a scalar value") endif notnans = ! logical (sum (isnan ([y X]), 2)); y = y(notnans); X = X(notnans,:); [Xq Xr] = qr (X, 0); pinv_X = Xr \ Xq'; b = pinv_X * y; if (nargout > 1) n = rows (X); p = columns (X); dof = n - p; t_alpha_2 = tinv (alpha / 2, dof); r = y - X * b; # added -- Nir SSE = sum (r .^ 2); v = SSE / dof; # c = diag(inv (X' * X)) using (economy) QR decomposition # which means that we only have to use Xr c = diag (inv (Xr' * Xr)); db = t_alpha_2 * sqrt (v * c); bint = [b + db, b - db]; endif if (nargout > 3) dof1 = n - p - 1; h = sum(X.*pinv_X', 2); #added -- Nir (same as diag(X*pinv_X), without doing the matrix multiply) # From Matlab's documentation on Multiple Linear Regression, # sigmaihat2 = norm (r) ^ 2 / dof1 - r .^ 2 / (dof1 * (1 - h)); # dr = -tinv (1 - alpha / 2, dof) * sqrt (sigmaihat2 .* (1 - h)); # Substitute # norm (r) ^ 2 == sum (r .^ 2) == SSE # -tinv (1 - alpha / 2, dof) == tinv (alpha / 2, dof) == t_alpha_2 # We get # sigmaihat2 = (SSE - r .^ 2 / (1 - h)) / dof1; # dr = t_alpha_2 * sqrt (sigmaihat2 .* (1 - h)); # Combine, we get # dr = t_alpha_2 * sqrt ((SSE * (1 - h) - (r .^ 2)) / dof1); dr = t_alpha_2 * sqrt ((SSE * (1 - h) - (r .^ 2)) / dof1); rint = [r + dr, r - dr]; endif if (nargout > 4) R2 = 1 - SSE / sum ((y - mean (y)) .^ 2); # F = (R2 / (p - 1)) / ((1 - R2) / dof); F = dof / (p - 1) / (1 / R2 - 1); pval = 1 - fcdf (F, p - 1, dof); stats = [R2 F pval v]; endif endfunction %!test %! % Longley data from the NIST Statistical Reference Dataset %! Z = [ 60323 83.0 234289 2356 1590 107608 1947 %! 61122 88.5 259426 2325 1456 108632 1948 %! 60171 88.2 258054 3682 1616 109773 1949 %! 61187 89.5 284599 3351 1650 110929 1950 %! 63221 96.2 328975 2099 3099 112075 1951 %! 63639 98.1 346999 1932 3594 113270 1952 %! 64989 99.0 365385 1870 3547 115094 1953 %! 63761 100.0 363112 3578 3350 116219 1954 %! 66019 101.2 397469 2904 3048 117388 1955 %! 67857 104.6 419180 2822 2857 118734 1956 %! 68169 108.4 442769 2936 2798 120445 1957 %! 66513 110.8 444546 4681 2637 121950 1958 %! 68655 112.6 482704 3813 2552 123366 1959 %! 69564 114.2 502601 3931 2514 125368 1960 %! 69331 115.7 518173 4806 2572 127852 1961 %! 70551 116.9 554894 4007 2827 130081 1962 ]; %! % Results certified by NIST using 500 digit arithmetic %! % b and standard error in b %! V = [ -3482258.63459582 890420.383607373 %! 15.0618722713733 84.9149257747669 %! -0.358191792925910E-01 0.334910077722432E-01 %! -2.02022980381683 0.488399681651699 %! -1.03322686717359 0.214274163161675 %! -0.511041056535807E-01 0.226073200069370 %! 1829.15146461355 455.478499142212 ]; %! Rsq = 0.995479004577296; %! F = 330.285339234588; %! y = Z(:,1); X = [ones(rows(Z),1), Z(:,2:end)]; %! alpha = 0.05; %! [b, bint, r, rint, stats] = regress (y, X, alpha); %! assert(b,V(:,1),3e-6); %! assert(stats(1),Rsq,1e-12); %! assert(stats(2),F,3e-8); %! assert(((bint(:,1)-bint(:,2))/2)/tinv(alpha/2,9),V(:,2),-1.e-5); statistics-1.3.0/inst/regress_gp.m0000755000000000000000000001065012776476211015350 0ustar 00000000000000## Copyright (c) 2012 Juan Pablo Carbajal ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{K}] =} regress_gp (@var{x}, @var{y}, @var{Sp}) ## @deftypefnx {Function File} {[@dots{} @var{yi} @var{dy}] =} regress_gp (@dots{}, @var{xi}) ## Linear scalar regression using gaussian processes. ## ## It estimates the model @var{y} = @var{x}'*m for @var{x} R^D and @var{y} in R. ## The information about errors of the predictions (interpolation/extrapolation) is given ## by the covarianve matrix @var{K}. If D==1 the inputs must be column vectors, ## if D>1 then @var{x} is n-by-D, with n the number of data points. @var{Sp} defines ## the prior covariance of @var{m}, it should be a (D+1)-by-(D+1) positive definite matrix, ## if it is empty, the default is @code{Sp = 100*eye(size(x,2)+1)}. ## ## If @var{xi} inputs are provided, the model is evaluated and returned in @var{yi}. ## The estimation of the variation of @var{yi} are given in @var{dy}. ## ## Run @code{demo regress_gp} to see an examples. ## ## The function is a direc implementation of the formulae in pages 11-12 of ## Gaussian Processes for Machine Learning. Carl Edward Rasmussen and @ ## Christopher K. I. Williams. The MIT Press, 2006. ISBN 0-262-18253-X. ## available online at @url{http://gaussianprocess.org/gpml/}. ## ## @seealso{regress} ## @end deftypefn function [wm K yi dy] = regress_gp (x,y,Sp=[],xi=[]) if isempty(Sp) Sp = 100*eye(size(x,2)+1); end x = [ones(1,size(x,1)); x']; ## Juan Pablo Carbajal ## Note that in the book the equation (below 2.11) for the A reads ## A = (1/sy^2)*x*x' + inv (Vp); ## where sy is the scalar variance of the of the residuals (i.e y = x' * w + epsilon) ## and epsilon is drawn from N(0,sy^2). Vp is the variance of the parameters w. ## Note that ## (sy^2 * A)^{-1} = (1/sy^2)*A^{-1} = (x*x' + sy^2 * inv(Vp))^{-1}; ## and that the formula for the w mean is ## (1/sy^2)*A^{-1}*x*y ## Then one obtains ## inv(x*x' + sy^2 * inv(Vp))*x*y ## Looking at the formula bloew we see that Sp = (1/sy^2)*Vp ## making the regression depend on only one parameter, Sp, and not two. A = x*x' + inv (Sp); K = inv (A); wm = K*x*y; yi =[]; dy =[]; if !isempty (xi); xi = [ones(size(xi,1),1) xi]; yi = xi*wm; dy = diag (xi*K*xi'); end endfunction %!demo %! % 1D Data %! x = 2*rand (5,1)-1; %! y = 2*x -1 + 0.3*randn (5,1); %! %! % Points for interpolation/extrapolation %! xi = linspace (-2,2,10)'; %! %! [m K yi dy] = regress_gp (x,y,[],xi); %! %! plot (x,y,'xk',xi,yi,'r-',xi,bsxfun(@plus, yi, [-dy +dy]),'b-'); %!demo %! % 2D Data %! x = 2*rand (4,2)-1; %! y = 2*x(:,1)-3*x(:,2) -1 + 1*randn (4,1); %! %! % Mesh for interpolation/extrapolation %! [xi yi] = meshgrid (linspace (-1,1,10)); %! %! [m K zi dz] = regress_gp (x,y,[],[xi(:) yi(:)]); %! zi = reshape (zi, 10,10); %! dz = reshape (dz,10,10); %! %! plot3 (x(:,1),x(:,2),y,'.g','markersize',8); %! hold on; %! h = mesh (xi,yi,zi,zeros(10,10)); %! set(h,'facecolor','none'); %! h = mesh (xi,yi,zi+dz,ones(10,10)); %! set(h,'facecolor','none'); %! h = mesh (xi,yi,zi-dz,ones(10,10)); %! set(h,'facecolor','none'); %! hold off %! axis tight %! view(80,25) %!demo %! % Projection over basis function %! pp = [2 2 0.3 1]; %! n = 10; %! x = 2*rand (n,1)-1; %! y = polyval(pp,x) + 0.3*randn (n,1); %! %! % Powers %! px = [sqrt(abs(x)) x x.^2 x.^3]; %! %! % Points for interpolation/extrapolation %! xi = linspace (-1,1,100)'; %! pxi = [sqrt(abs(xi)) xi xi.^2 xi.^3]; %! %! Sp = 100*eye(size(px,2)+1); %! Sp(2,2) = 1; # We don't believe the sqrt is present %! [m K yi dy] = regress_gp (px,y,Sp,pxi); %! disp(m) %! %! plot (x,y,'xk;Data;',xi,yi,'r-;Estimation;',xi,polyval(pp,xi),'g-;True;'); %! axis tight %! axis manual %! hold on %! plot (xi,bsxfun(@plus, yi, [-dy +dy]),'b-'); %! hold off statistics-1.3.0/inst/repanova.m0000755000000000000000000001002612776476211015020 0ustar 00000000000000## Copyright (C) 2011 Kyle Winfree ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{pval}, @var{table}, @var{st}] =} repanova (@var{X}, @var{cond}) ## @deftypefnx {Function File} {[@var{pval}, @var{table}, @var{st}] =} repanova (@var{X}, @var{cond}, ['string' | 'cell']) ## Perform a repeated measures analysis of variance (Repeated ANOVA). ## X is formated such that each row is a subject and each column is a condition. ## ## condition is typically a point in time, say t=1 then t=2, etc ## condition can also be thought of as groups. ## ## The optional flag can be either 'cell' or 'string' and reflects ## the format of the table returned. Cell is the default. ## ## NaNs are ignored using nanmean and nanstd. ## ## This fuction does not currently support multiple columns of the same ## condition! ## @end deftypefn function [p, table, st] = repanova(varargin) switch nargin case 0 error('Too few inputs.'); case 1 X = varargin{1}; for c = 1:size(X, 2) condition{c} = ['time', num2str(c)]; end option = 'cell'; case 2 X = varargin{1}; condition = varargin{2}; option = 'cell'; case 3 X = varargin{1}; condition = varargin{2}; option = varargin{3}; otherwise error('Too many inputs.'); end % Find the means of the subjects and measures, ignoring any NaNs u_subjects = nanmean(X,2); u_measures = nanmean(X,1); u_grand = nansum(nansum(X)) / (size(X,1) * size(X,2)); % Differences between rows will be reflected in SS subjects, differences % between columns will be reflected in SS_within subjects. N = size(X,1); % number of subjects J = size(X,2); % number of samples per subject SS_measures = N * nansum((u_measures - u_grand).^2); SS_subjects = J * nansum((u_subjects - u_grand).^2); SS_total = nansum(nansum((X - u_grand).^2)); SS_error = SS_total - SS_measures - SS_subjects; df_measures = J - 1; df_subjects = N - 1; df_grand = (N*J) - 1; df_error = df_grand - df_measures - df_subjects; MS_measures = SS_measures / df_measures; MS_subjects = SS_subjects / df_subjects; MS_error = SS_error / df_error; % variation expected as a result of sampling error alone F = MS_measures / MS_error; p = 1 - fcdf(F, df_measures, df_error); % Probability of F given equal means. if strcmp(option, 'string') table = [sprintf('\nSource\tSS\tdf\tMS\tF\tProb > F'), ... sprintf('\nSubject\t%g\t%i\t%g', SS_subjects, df_subjects, MS_subjects), ... sprintf('\nMeasure\t%g\t%i\t%g\t%g\t%g', SS_measures, df_measures, MS_measures, F, p), ... sprintf('\nError\t%g\t%i\t%g', SS_error, df_error, MS_error), ... sprintf('\n')]; else table = {'Source', 'Partial SS', 'df', 'MS', 'F', 'Prob > F'; ... 'Subject', SS_subjects, df_subjects, MS_subjects, '', ''; ... 'Measure', SS_measures, df_measures, MS_measures, F, p}; end st.gnames = condition'; % this is the same struct format used in anova1 st.n = repmat(N, 1, J); st.source = 'anova1'; % it cannot be assumed that 'repanova' is a supported source for multcompare st.means = u_measures; st.df = df_error; st.s = sqrt(MS_error); end % This function was created with guidance from the following websites: % http://courses.washington.edu/stat217/rmANOVA.html % http://grants.hhp.coe.uh.edu/doconnor/PEP6305/Topic%20010%20Repeated%20Measures.htm statistics-1.3.0/inst/runstest.m0000755000000000000000000001243112776476211015076 0ustar 00000000000000## Copyright (C) 2013 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{h}, @var{p}, @var{stats} =} runstest (@var{x}, @var{v}) ## Runs test for detecting serial correlation in the vector @var{x}. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{x} is the vector of given values. ## @item ## @var{v} is the value to subtract from @var{x} to get runs (defaults to @code{median(x)}) ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{h} is true if serial correlation is detected at the 95% confidence level (two-tailed), false otherwise. ## @item ## @var{p} is the probablity of obtaining a test statistic of the magnitude found under the null hypothesis of no serial correlation. ## @item ## @var{stats} is the structure containing as fields the number of runs @var{nruns}; the numbers of positive and negative values of @code{x - v}, @var{n1} and @var{n0}; and the test statistic @var{z}. ## ## @end itemize ## ## Note: the large-sample normal approximation is used to find @var{h} and @var{p}. This is accurate if @var{n1}, @var{n0} are both greater than 10. ## ## Reference: ## NIST Engineering Statistics Handbook, 1.3.5.13. Runs Test for Detecting Non-randomness, http://www.itl.nist.gov/div898/handbook/eda/section3/eda35d.htm ## ## @seealso{} ## @end deftypefn ## Author: Nir Krakauer ## Description: Runs test for detecting serial correlation function [h, p, stats] = runstest (x, x2) # Check arguments if (nargin < 1) print_usage; endif if nargin > 1 && isnumeric(x2) v = x2; else v = median(x); endif x = x(~isnan(x)); #delete missing values x = sign(x - v); x = x(x ~= 0); #delete any zeros R = sum((x(1:(end-1)) .* x(2:end)) < 0) + 1; #number of runs #expected number of runs for an iid sequence n1 = sum(x > 0); n2 = sum(x < 0); R_bar = 1 + 2*n1*n2/(n1 + n2); #standard deviation of number of runs for an iid sequence s_R = sqrt(2*n1*n2*(2*n1*n2 - n1 - n2)/((n1 + n2)^2 * (n1 + n2 - 1))); #desired significance level alpha = 0.05; Z = (R - R_bar) / s_R; #test statistic p = 2 * normcdf(-abs(Z)); h = p < alpha; if nargout > 2 stats.nruns = R; stats.n1 = n1; stats.n0 = n2; stats.z = Z; endif endfunction %!test %! data = [-213 -564 -35 -15 141 115 -420 -360 203 -338 -431 194 -220 -513 154 -125 -559 92 -21 -579 -52 99 -543 -175 162 -457 -346 204 -300 -474 164 -107 -572 -8 83 -541 -224 180 -420 -374 201 -236 -531 83 27 -564 -112 131 -507 -254 199 -311 -495 143 -46 -579 -90 136 -472 -338 202 -287 -477 169 -124 -568 17 48 -568 -135 162 -430 -422 172 -74 -577 -13 92 -534 -243 194 -355 -465 156 -81 -578 -64 139 -449 -384 193 -198 -538 110 -44 -577 -6 66 -552 -164 161 -460 -344 205 -281 -504 134 -28 -576 -118 156 -437 -381 200 -220 -540 83 11 -568 -160 172 -414 -408 188 -125 -572 -32 139 -492 -321 205 -262 -504 142 -83 -574 0 48 -571 -106 137 -501 -266 190 -391 -406 194 -186 -553 83 -13 -577 -49 103 -515 -280 201 300 -506 131 -45 -578 -80 138 -462 -361 201 -211 -554 32 74 -533 -235 187 -372 -442 182 -147 -566 25 68 -535 -244 194 -351 -463 174 -125 -570 15 72 -550 -190 172 -424 -385 198 -218 -536 96]; #NIST beam deflection data, http://www.itl.nist.gov/div898/handbook/eda/section4/eda425.htm %! [h, p, stats] = runstest (data); %! expected_h = true; %! expected_p = 0.0070646; %! expected_z = 2.6938; %! assert (h, expected_h); %! assert (p, expected_p, 1E-6); %! assert (stats.z, expected_z, 1E-4); statistics-1.3.0/inst/signtest.m0000755000000000000000000001236212776476211015052 0ustar 00000000000000## Copyright (C) 2014 Tony Richardson ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{pval}, @var{h}, @var{stats}] =} signtest (@var{x}) ## @deftypefnx {Function File} {[@var{pval}, @var{h}, @var{stats}] =} signtest (@var{x}, @var{m}) ## @deftypefnx {Function File} {[@var{pval}, @var{h}, @var{stats}] =} signtest (@var{x}, @var{y}) ## @deftypefnx {Function File} {[@var{pval}, @var{h}, @var{stats}] =} signtest (@var{x}, @var{y}, @var{Name}, @var{Value}) ## Test for median. ## ## Perform a signtest of the null hypothesis that @var{x} is from a distribution ## that has a zero median. ## ## If the second argument @var{m} is a scalar, the null hypothesis is that ## X has median m. ## ## If the second argument @var{y} is a vector, the null hypothesis is that ## the distribution of @code{@var{x} - @var{y}} has zero median. ## ## The argument @qcode{"alpha"} can be used to specify the significance level ## of the test (the default value is 0.05). The string ## argument @qcode{"tail"}, can be used to select the desired alternative ## hypotheses. If @qcode{"alt"} is @qcode{"both"} (default) the null is ## tested against the two-sided alternative @code{median (@var{x}) != @var{m}}. ## If @qcode{"alt"} is @qcode{"right"} the one-sided ## alternative @code{median (@var{x}) > @var{m}} is considered. ## Similarly for @qcode{"left"}, the one-sided alternative @code{median ## (@var{x}) < @var{m}} is considered. When @qcode{"method"} is @qcode{"exact"} ## the p-value is computed using an exact method (this is the default). When ## @qcode{"method"} is @qcode{"approximate"} a normal approximation is used for the ## test statistic. ## ## The p-value of the test is returned in @var{pval}. If @var{h} is 0 the ## null hypothesis is accepted, if it is 1 the null hypothesis is rejected. ## @var{stats} is a structure containing the value of the test statistic ## (@var{sign}) and the value of the z statistic (@var{zval}) (only computed ## when the 'method' is 'approximate'. ## ## @end deftypefn ## Author: Tony Richardson function [p, h, stats] = signtest(x, my, varargin) my_default = 0; alpha = 0.05; tail = 'both'; method = 'exact'; % Find the first non-singleton dimension of x dim = min(find(size(x)~=1)); if isempty(dim), dim = 1; end if (nargin == 1) my = my_default; end i = 1; while ( i <= length(varargin) ) switch lower(varargin{i}) case 'alpha' i = i + 1; alpha = varargin{i}; case 'tail' i = i + 1; tail = varargin{i}; case 'method' i = i + 1; method = varargin{i}; case 'dim' i = i + 1; dim = varargin{i}; otherwise error('Invalid Name argument.',[]); end i = i + 1; end if ~isa(tail, 'char') error('tail argument to signtest must be a string\n',[]); end if ~isa(method, 'char') error('method argument to signtest must be a string\n',[]); end % Set default values if arguments are present but empty if isempty(my) my = my_default; end % This adjustment allows everything else to remain the % same for both the one-sample t test and paired tests. % If second argument is a vector if ~isscalar(my) x = x - my; my = my_default; end n = size(x, dim); switch lower(method) case 'exact' stats.zval = nan; switch lower(tail) case 'both' w = min(sum(xmy)); pl = binocdf(w, n, 0.5); p = 2*min(pl,1-pl); case 'left' w = sum(xmy); p = 1 - binocdf(w, n, 0.5); otherwise error('Invalid tail argument to signtest\n',[]); end case 'approximate' switch lower(tail) case 'both' npos = sum(x>my); nneg = sum(xmy); nneg = sum(xmy); nneg = sum(xmy); stats.zval = (w - 0.5*n - 0.5*sign(npos-nneg))/sqrt(0.25*n); p = 1-normcdf(stats.zval); otherwise error('Invalid tail argument to signtest\n',[]); end otherwise error('Invalid method argument to signtest\n',[]); end stats.sign = w; h = double(p < alpha); end statistics-1.3.0/inst/squareform.m0000755000000000000000000001033412776476211015373 0ustar 00000000000000## Copyright (C) 2015 Carnë Draug ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{z} =} squareform (@var{y}) ## @deftypefnx {Function File} {@var{y} =} squareform (@var{z}) ## @deftypefnx {Function File} {@var{z} =} squareform (@var{y}, @qcode{"tovector"}) ## @deftypefnx {Function File} {@var{y} =} squareform (@var{z}, @qcode{"tomatrix"}) ## Interchange between distance matrix and distance vector formats. ## ## Converts between an hollow (diagonal filled with zeros), square, and ## symmetric matrix and a vector with of the lower triangular part. ## ## Its target application is the conversion of the vector returned by ## @code{pdist} into a distance matrix. It performs the opposite operation ## if input is a matrix. ## ## If @var{x} is a vector, its number of elements must fit into the ## triangular part of a matrix (main diagonal excluded). In other words, ## @code{numel (@var{x}) = @var{n} * (@var{n} - 1) / 2} for some integer ## @var{n}. The resulting matrix will be @var{n} by @var{n}. ## ## If @var{x} is a distance matrix, it must be square and the diagonal entries ## of @var{x} must all be zeros. @code{squareform} will generate a warning if ## @var{x} is not symmetric. ## ## The second argument is used to specify the output type in case there ## is a single element. It will defaults to @qcode{"tomatrix"} otherwise. ## ## @seealso{pdist} ## @end deftypefn ## Author: Carnë Draug function y = squareform (x, method) if (nargin < 1 || nargin > 2) print_usage (); elseif (! isnumeric (x) || ! ismatrix (x)) error ("squareform: Y or Z must be a numeric matrix or vector"); endif if (nargin == 1) ## This is ambiguous when numel (x) == 1, but that's the whole reason ## why the "method" option exists. if (isvector (x)) method = "tomatrix"; else method = "tovector"; endif endif switch (tolower (method)) case "tovector" if (! issquare (x)) error ("squareform: Z is not a square matrix"); elseif (any (diag (x) != 0)) error ("squareform: Z is not a hollow matrix, i.e., with diagonal entries all zero"); elseif (! issymmetric(x)) warning ("squareform:symmetric", "squareform: Z is not a symmetric matrix"); endif y = vec (tril (x, -1, "pack"), 2); case "tomatrix" ## the dimensions of y are the solution to the quadratic formula for: ## length (x) = (sy - 1) * (sy / 2) sy = (1 + sqrt (1 + 8 * numel (x))) / 2; if (fix (sy) != sy) error ("squareform: the numel of Y cannot form a square matrix"); endif y = zeros (sy, class (x)); y(tril (true (sy), -1)) = x; # fill lower triangular part y += y.'; # and then the upper triangular part otherwise error ("squareform: invalid METHOD '%s'", method); endswitch endfunction %!shared v, m %! v = 1:6; %! m = [0 1 2 3;1 0 4 5;2 4 0 6;3 5 6 0]; ## make sure that it can go both directions automatically %!assert (squareform (v), m) %!assert (squareform (squareform (v)), v) %!assert (squareform (m), v) ## treat row and column vectors equally %!assert (squareform (v'), m) ## handle 1 element input properly %!assert (squareform (1), [0 1;1 0]) %!assert (squareform (1, "tomatrix"), [0 1; 1 0]) %!assert (squareform (0, "tovector"), zeros (1, 0)) %!warning squareform ([0 1 2; 3 0 4; 5 6 0]); ## confirm that it respects input class %!test %! for c = {@single, @double, @uint8, @uint32, @uint64} %! f = c{1}; %! assert (squareform (f (v)), f (m)) %! assert (squareform (f (m)), f (v)) %! endfor statistics-1.3.0/inst/stepwisefit.m0000755000000000000000000001127212776476211015557 0ustar 00000000000000## Copyright (C) 2013-2014 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{X_use}, @var{b}, @var{bint}, @var{r}, @var{rint}, @var{stats} =} stepwisefit (@var{y}, @var{X}, @var{penter} = 0.05, @var{premove} = 0.1) ## Linear regression with stepwise variable selection. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{y} is an @var{n} by 1 vector of data to fit. ## @item ## @var{X} is an @var{n} by @var{k} matrix containing the values of @var{k} potential predictors. No constant term should be included (one will always be added to the regression automatically). ## @item ## @var{penter} is the maximum p-value to enter a new variable into the regression (default: 0.05). ## @item ## @var{premove} is the minimum p-value to remove a variable from the regression (default: 0.1). ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{X_use} contains the indices of the predictors included in the final regression model. The predictors are listed in the order they were added, so typically the first ones listed are the most significant. ## @item ## @var{b}, @var{bint}, @var{r}, @var{rint}, @var{stats} are the results of @code{[b, bint, r, rint, stats] = regress(y, [ones(size(y)) X(:, X_use)], penter);} ## @end itemize ## @subheading References ## ## @enumerate ## @item ## N. R. Draper and H. Smith (1966). @cite{Applied Regression Analysis}. Wiley. Chapter 6. ## ## @end enumerate ## @seealso{regress} ## @end deftypefn ## Author: Nir Krakauer ## Description: Linear regression with stepwise variable selection function [X_use, b, bint, r, rint, stats] = stepwisefit(y, X, penter = 0.05, premove = 0.1) #remove any rows with missing entries notnans = !any (isnan ([y X]) , 2); y = y(notnans); X = X(notnans,:); n = numel(y); #number of data points k = size(X, 2); #number of predictors X_use = []; v = 0; #number of predictor variables in regression model iter = 0; max_iters = 100; #maximum number of interations to do r = y; while 1 iter++; #decide which variable to add to regression, if any added = false; if numel(X_use) < k X_inds = zeros(k, 1, "logical"); X_inds(X_use) = 1; [~, i_max_corr] = max(abs(corr(X(:, ~X_inds), r))); #try adding the variable with the highest correlation to the residual from current regression i_max_corr = (1:k)(~X_inds)(i_max_corr); #index within the original predictor set [b_new, bint_new, r_new, rint_new, stats_new] = regress(y, [ones(n, 1) X(:, [X_use i_max_corr])], penter); z_new = abs(b_new(end)) / (bint_new(end, 2) - b_new(end)); if z_new > 1 #accept new variable added = true; X_use = [X_use i_max_corr]; b = b_new; bint = bint_new; r = r_new; rint = rint_new; stats = stats_new; v = v + 1; endif endif #decide which variable to drop from regression, if any dropped = false; if v > 0 t_ratio = tinv(1 - premove/2, n - v - 1) / tinv(1 - penter/2, n - v - 1); #estimate the ratio between the z score corresponding to premove to that corresponding to penter [z_min, i_min] = min(abs(b(2:end)) ./ (bint(2:end, 2) - b(2:end))); if z_min < t_ratio #drop a variable dropped = true; X_use(i_min) = []; [b, bint, r, rint, stats] = regress(y, [ones(n, 1) X(:, X_use)], penter); v = v - 1; endif endif #terminate if no change in the list of regression variables if ~added && ~dropped break endif if iter >= max_iters warning('stepwisefit: maximum iteration count exceeded before convergence') break endif endwhile endfunction %!test %! % Sample data from Draper and Smith (n = 13, k = 4) %! X = [7 1 11 11 7 11 3 1 2 21 1 11 10; ... %! 26 29 56 31 52 55 71 31 54 47 40 66 68; ... %! 6 15 8 8 6 9 17 22 18 4 23 9 8; ... %! 60 52 20 47 33 22 6 44 22 26 34 12 12]'; %! y = [78.5 74.3 104.3 87.6 95.9 109.2 102.7 72.5 93.1 115.9 83.8 113.3 109.4]'; %! [X_use, b, bint, r, rint, stats] = stepwisefit(y, X); %! assert(X_use, [4 1]) %! assert(b, regress(y, [ones(size(y)) X(:, X_use)], 0.05)) statistics-1.3.0/inst/tabulate.m0000755000000000000000000000770112776476211015014 0ustar 00000000000000## Copyright (C) 2003 Alberto Terruzzi ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{table} =} tabulate (@var{data}, @var{edges}) ## ## Compute a frequency table. ## ## For vector data, the function counts the number of ## values in data that fall between the elements in the edges vector ## (which must contain monotonically non-decreasing values). @var{table} is a ## matrix. ## The first column of @var{table} is the number of bin, the second ## is the number of instances in each class (absolute frequency). The ## third column contains the percentage of each value (relative ## frequency) and the fourth column contains the cumulative frequency. ## ## If @var{edges} is missed the width of each class is unitary, if @var{edges} ## is a scalar then represent the number of classes, or you can define the ## width of each bin. ## @var{table}(@var{k}, 2) will count the value @var{data} (@var{i}) if ## @var{edges} (@var{k}) <= @var{data} (@var{i}) < @var{edges} (@var{k}+1). ## The last bin will count the value of @var{data} (@var{i}) if ## @var{edges}(@var{k}) <= @var{data} (@var{i}) <= @var{edges} (@var{k}+1). ## Values outside the values in @var{edges} are not counted. Use -inf and inf ## in @var{edges} to include all values. ## Tabulate with no output arguments returns a formatted table in the ## command window. ## ## Example ## ## @example ## sphere_radius = [1:0.05:2.5]; ## tabulate (sphere_radius) ## @end example ## ## Tabulate returns 2 bins, the first contains the sphere with radius ## between 1 and 2 mm excluded, and the second one contains the sphere with ## radius between 2 and 3 mm. ## ## @example ## tabulate (sphere_radius, 10) ## @end example ## ## Tabulate returns ten bins. ## ## @example ## tabulate (sphere_radius, [1, 1.5, 2, 2.5]) ## @end example ## ## Tabulate returns three bins, the first contains the sphere with radius ## between 1 and 1.5 mm excluded, the second one contains the sphere with ## radius between 1.5 and 2 mm excluded, and the third contains the sphere with ## radius between 2 and 2.5 mm. ## ## @example ## bar (table (:, 1), table (:, 2)) ## @end example ## ## draw histogram. ## ## @seealso{bar, pareto} ## @end deftypefn ## Author: Alberto Terruzzi ## Version: 1.0 ## Created: 13 February 2003 function table = tabulate (varargin) if nargin < 1 || nargin > 2 print_usage; endif data = varargin{1}; if isvector (data) != 1 error ("data must be a vector."); endif n = length(data); m = min(data); M = max(data); if nargin == 1 edges = 1:1:max(data)+1; else edges = varargin{2}; end if isscalar(edges) h=(M-m)/edges; edges = [m:h:M]; end # number of classes bins=length(edges)-1; # initialize freqency table freqtable = zeros(bins,4); for k=1:1:bins; if k != bins freqtable(k,2)=length(find (data >= edges(k) & data < edges(k+1))); else freqtable(k,2)=length(find (data >= edges(k) & data <= edges(k+1))); end if k == 1 freqtable (k,4) = freqtable(k,2); else freqtable(k,4) = freqtable(k-1,4) + freqtable(k,2); end end freqtable(:,1) = edges(1:end-1)(:); freqtable(:,3) = 100*freqtable(:,2)/n; if nargout == 0 disp(" bin Fa Fr% Fc"); printf("%8g %5d %6.2f%% %5d\n",freqtable'); else table = freqtable; end endfunction statistics-1.3.0/inst/tblread.m0000755000000000000000000000560612776476211014632 0ustar 00000000000000## Copyright (C) 2008 Bill Denney ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{data}, @var{varnames}, @var{casenames}] =} tblread (@var{filename}) ## @deftypefnx {Function File} {[@var{data}, @var{varnames}, @var{casenames}] =} tblread (@var{filename}, @var{delimeter}) ## Read tabular data from an ascii file. ## ## @var{data} is read from an ascii data file named @var{filename} with ## an optional @var{delimeter}. The delimeter may be any single ## character or ## @itemize ## @item "space" " " (default) ## @item "tab" "\t" ## @item "comma" "," ## @item "semi" ";" ## @item "bar" "|" ## @end itemize ## ## The @var{data} is read starting at cell (2,2) where the ## @var{varnames} form a char matrix from the first row (starting at ## (1,2)) vertically concatenated, and the @var{casenames} form a char ## matrix read from the first column (starting at (2,1)) vertically ## concatenated. ## @seealso{tblwrite, csv2cell, cell2csv} ## @end deftypefn function [data, varnames, casenames] = tblread (f="", d=" ") ## Check arguments if nargin < 1 || nargin > 2 print_usage (); endif if isempty (f) ## FIXME: open a file dialog box in this case when a file dialog box ## becomes available error ("tblread: filename must be given") endif [d err] = tbl_delim (d); if ! isempty (err) error ("tblread: %s", err) endif d = csv2cell (f, d); data = cell2mat (d(2:end, 2:end)); varnames = strvcat (d(1,2:end)); casenames = strvcat (d(2:end,1)); endfunction ## Tests %!shared d, v, c %! d = [1 2;3 4]; %! v = ["a ";"bc"]; %! c = ["de";"f "]; %!test %! [dt vt ct] = tblread ("tblread-space.dat"); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); %!test %! [dt vt ct] = tblread ("tblread-space.dat", " "); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); %!test %! [dt vt ct] = tblread ("tblread-space.dat", "space"); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); %!test %! [dt vt ct] = tblread ("tblread-tab.dat", "tab"); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); %!test %! [dt vt ct] = tblread ("tblread-tab.dat", "\t"); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); %!test %! [dt vt ct] = tblread ("tblread-tab.dat", '\t'); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); statistics-1.3.0/inst/tblwrite.m0000755000000000000000000000774112776476211015053 0ustar 00000000000000## Copyright (C) 2008 Bill Denney ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {} tblwrite (@var{data}, @var{varnames}, @var{casenames}, @var{filename}) ## @deftypefnx {Function File} {} tblwrite (@var{data}, @var{varnames}, @var{casenames}, @var{filename}, @var{delimeter}) ## Write tabular data to an ascii file. ## ## @var{data} is written to an ascii data file named @var{filename} with ## an optional @var{delimeter}. The delimeter may be any single ## character or ## @itemize ## @item "space" " " (default) ## @item "tab" "\t" ## @item "comma" "," ## @item "semi" ";" ## @item "bar" "|" ## @end itemize ## ## The @var{data} is written starting at cell (2,2) where the ## @var{varnames} are a char matrix or cell vector written to the first ## row (starting at (1,2)), and the @var{casenames} are a char matrix ## (or cell vector) written to the first column (starting at (2,1)). ## @seealso{tblread, csv2cell, cell2csv} ## @end deftypefn function tblwrite (data, varnames, casenames, f="", d=" ") ## Check arguments if nargin < 4 || nargin > 5 print_usage (); endif varnames = __makecell__ (varnames, "varnames"); casenames = __makecell__ (casenames, "varnames"); if numel (varnames) != columns (data) error ("tblwrite: the number of rows (or cells) in varnames must equal the number of columns in data") endif if numel (varnames) != rows (data) error ("tblwrite: the number of rows (or cells) in casenames must equal the number of rows in data") endif if isempty (f) ## FIXME: open a file dialog box in this case when a file dialog box ## becomes available error ("tblread: filename must be given") endif [d err] = tbl_delim (d); if ! isempty (err) error ("tblwrite: %s", err) endif dat = cell (size (data) + 1); dat(1,2:end) = varnames; dat(2:end,1) = casenames; dat(2:end,2:end) = mat2cell (data, ones (rows (data), 1), ones (columns (data), 1));; cell2csv (f, dat, d); endfunction function x = __makecell__ (x, name) ## force x into a cell matrix if ! iscell (x) if ischar (x) ## convert varnames into a cell x = mat2cell (x, ones (rows (x), 1)); else error ("tblwrite: %s must be either a char or a cell", name) endif endif endfunction ## Tests %!shared d, v, c %! d = [1 2;3 4]; %! v = ["a ";"bc"]; %! c = ["de";"f "]; %!test %! tblwrite (d, v, c, "tblwrite-space.dat"); %! [dt vt ct] = tblread ("tblwrite-space.dat", " "); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); %!test %! tblwrite (d, v, c, "tblwrite-space.dat", " "); %! [dt vt ct] = tblread ("tblwrite-space.dat", " "); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); %!test %! tblwrite (d, v, c, "tblwrite-space.dat", "space"); %! [dt vt ct] = tblread ("tblwrite-space.dat"); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); %!test %! tblwrite (d, v, c, "tblwrite-tab.dat", "tab"); %! [dt vt ct] = tblread ("tblwrite-tab.dat", "tab"); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); %!test %! tblwrite (d, v, c, "tblwrite-tab.dat", "\t"); %! [dt vt ct] = tblread ("tblwrite-tab.dat", "\t"); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); %!test %! tblwrite (d, v, c, "tblwrite-tab.dat", '\t'); %! [dt vt ct] = tblread ("tblwrite-tab.dat", '\t'); %! assert (dt, d); %! assert (vt, v); %! assert (ct, c); statistics-1.3.0/inst/tricdf.m0000644000000000000000000001027512776476211014463 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1997-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} tricdf (@var{x}, @var{a}, @var{b}, @var{c}) ## Compute the cumulative distribution function (CDF) at @var{x} of the ## triangular distribution with parameters @var{a}, @var{b}, and @var{c} ## on the interval [@var{a}, @var{b}]. ## @end deftypefn ## Author: Dag Lyberg ## Description: CDF of the triangle distribution function cdf = tricdf (x, a, b, c) if (nargin != 4) print_usage (); endif if (! isscalar (a) || ! isscalar (b) || ! isscalar (c)) [retval, x, a, b, c] = common_size (x, a, b, c); if (retval > 0) error ("tricdf: X, A, B, and C must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (a) || iscomplex (b) || iscomplex (c)) error ("tricdf: X, A, B, and C must not be complex"); endif if (isa (x, "single") || isa (a, "single") || isa (b, "single") || isa (c, "single")) cdf = zeros (size (x), "single"); else cdf = zeros (size (x)); endif k = isnan (x) | !(a < b) | !(c >= a) | !(c <= b) ; cdf(k) = NaN; k = (x > a) & (a < b) & (a <= c) & (c <= b); if (isscalar (a) && isscalar (b) && isscalar (c)) h = 2 / (b-a); k_temp = k & (c <= x); full_area = (c-a) * h / 2; cdf(k_temp) += full_area; k_temp = k & (a < x) & (x < c); area = (x(k_temp) - a).^2 * h; cdf(k_temp) += area; k_temp = k & (b <= x); full_area = (b-c) * h / 2; cdf(k_temp) += full_area; k_temp = k & (c < x) & (x < b); area = (b-x(k_temp)).^2 * h; cdf(k_temp) += full_area - area; else h = 2 ./ (b-a); k_temp = k & (c <= x); full_area = (c(k_temp)-a(k_temp)) .* h(k_temp) / 2; cdf(k_temp) += full_area; k_temp = k & (a <= x) & (x < c); area = (x(k_temp) - a(k_temp)).^2 .* h(k_temp); cdf(k_temp) += area; k_temp = k & (b <= x); full_area = (b(k_temp)-c(k_temp)) .* h(k_temp) / 2; cdf(k_temp) += full_area; k_temp = k & (c <= x) & (x < b); area = (b(k_temp)-x(k_temp)).^2 .* h(k_temp); cdf(k_temp) += full_area - area; endif endfunction %!shared x,y %! x = [-1, 0, 0.1, 0.5, 0.9, 1, 2] + 1; %! y = [0, 0, 0.02, 0.5, 0.98, 1 1]; %!assert (tricdf (x, ones (1,7), 2*ones (1,7), 1.5*ones (1,7)), y, eps) %!assert (tricdf (x, 1*ones (1,7), 2, 1.5), y, eps) %!assert (tricdf (x, 1, 2*ones (1,7), 1.5), y, eps) %!assert (tricdf (x, 1, 2, 1.5*ones (1,7)), y, eps) %!assert (tricdf (x, 1, 2, 1.5), y, eps) %!assert (tricdf (x, [1, 1, NaN, 1, 1, 1, 1], 2, 1.5), [y(1:2), NaN, y(4:7)], eps) %!assert (tricdf (x, 1, 2*[1, 1, NaN, 1, 1, 1, 1], 1.5), [y(1:2), NaN, y(4:7)], eps) %!assert (tricdf (x, 1, 2, 1.5*[1, 1, NaN, 1, 1, 1, 1]), [y(1:2), NaN, y(4:7)], eps) %!assert (tricdf ([x, NaN], 1, 2, 1.5), [y, NaN], eps) ## Test class of input preserved %!assert (tricdf (single ([x, NaN]), 1, 2, 1.5), single ([y, NaN]), eps('single')) %!assert (tricdf ([x, NaN], single (1), 2, 1.5), single ([y, NaN]), eps('single')) %!assert (tricdf ([x, NaN], 1, single (2), 1.5), single ([y, NaN]), eps('single')) %!assert (tricdf ([x, NaN], 1, 2, single (1.5)), single ([y, NaN]), eps('single')) ## Test input validation %!error tricdf () %!error tricdf (1) %!error tricdf (1,2) %!error tricdf (1,2,3) %!error tricdf (1,2,3,4,5) %!error tricdf (1, ones (3), ones (2), ones (2)) %!error tricdf (1, ones (2), ones (3), ones (2)) %!error tricdf (1, ones (2), ones (2), ones (3)) %!error tricdf (i, 2, 2, 2) %!error tricdf (2, i, 2, 2) %!error tricdf (2, 2, i, 2) %!error tricdf (2, 2, 2, i) statistics-1.3.0/inst/triinv.m0000644000000000000000000000751112776476211014522 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} triinv (@var{x}, @var{a}, @var{b}, @var{c}) ## For each element of @var{x}, compute the quantile (the inverse of the CDF) ## at @var{x} of the triangular distribution with parameters ## @var{a}, @var{b}, and @var{c} on the interval [@var{a}, @var{b}]. ## @end deftypefn ## Author: Dag Lyberg ## Description: Quantile function of the triangular distribution function inv = triinv (x, a, b, c) if (nargin != 4) print_usage (); endif if (! isscalar (a) || ! isscalar (b) || ! isscalar (c)) [retval, x, a, b, c] = common_size (x, a, b, c); if (retval > 0) error ("triinv: X, A, B, and C must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (a) || iscomplex (b) || iscomplex (c)) error ("triinv: X, A, B, and C must not be complex"); endif if (isa (x, "single") || isa (a, "single") || isa (b, "single")) inv = NaN (size (x), "single"); else inv = NaN (size (x)); endif k = (x >= 0) & (x <= 1) & (a < b) & (a <= c) & (c <= b); inv(k) = 0; if (isscalar (a) && isscalar (b) && isscalar(c)) h = 2 / (b-a); w = c-a; area1 = h * w / 2; j = k & (x <= area1); inv(j) += (x(j) * (h/2) * w).^0.5 + a; w = b-c; j = k & (area1 < x) & (x < 1); inv(j) += b - ((1-x(j)) * (h/2) * w).^0.5; j = k & (x == 1); inv(j) = b; else h = 2 ./ (b-a); w = c-a; area1 = h .* w / 2; j = k & (x <= area1); inv(j) += (x(j) .* (h(j)/2) .* w(j)).^0.5 + a(j); w = b-c; j = k & (area1 < x) & (x < 1); inv(j) += b(j) - ((1-x(j)) .* (h(j)/2) .* w(j)).^0.5; j = k & (x == 1); inv(j) = b(j); endif endfunction %!shared x,y %! x = [-1, 0, 0.02, 0.5, 0.98, 1, 2]; %! y = [NaN, 0, 0.1, 0.5, 0.9, 1, NaN] + 1; %!assert (triinv (x, ones (1,7), 2*ones (1,7), 1.5*ones (1,7)), y, eps) %!assert (triinv (x, 1*ones (1,7), 2, 1.5), y, eps) %!assert (triinv (x, 1, 2*ones (1,7), 1.5), y, eps) %!assert (triinv (x, 1, 2, 1.5*ones (1,7)), y, eps) %!assert (triinv (x, 1, 2, 1.5), y, eps) %!assert (triinv (x, [1, 1, NaN, 1, 1, 1, 1], 2, 1.5), [y(1:2), NaN, y(4:7)], eps) %!assert (triinv (x, 1, 2*[1, 1, NaN, 1, 1, 1, 1], 1.5), [y(1:2), NaN, y(4:7)], eps) %!assert (triinv (x, 1, 2, 1.5*[1, 1, NaN, 1, 1, 1, 1]), [y(1:2), NaN, y(4:7)], eps) %!assert (triinv ([x, NaN], 1, 2, 1.5), [y, NaN], eps) ## Test class of input preserved %!assert (triinv (single ([x, NaN]), 1, 2, 1.5), single ([y, NaN]), eps('single')) %!assert (triinv ([x, NaN], single (1), 2, 1.5), single ([y, NaN]), eps('single')) %!assert (triinv ([x, NaN], 1, single (2), 1.5), single ([y, NaN]), eps('single')) %!assert (triinv ([x, NaN], 1, 2, single (1.5)), single ([y, NaN]), eps('single')) ## Test input validation %!error triinv () %!error triinv (1) %!error triinv (1,2) %!error triinv (1,2,3) %!error triinv (1,2,3,4,5) %!error triinv (1, ones (3), ones (2), ones (2)) %!error triinv (1, ones (2), ones (3), ones (2)) %!error triinv (1, ones (2), ones (2), ones (3)) %!error triinv (i, 2, 2, 2) %!error triinv (2, i, 2, 2) %!error triinv (2, 2, i, 2) %!error triinv (2, 2, 2, i) statistics-1.3.0/inst/trimmean.m0000755000000000000000000000317612776476211015031 0ustar 00000000000000## Copyright (C) 2001 Paul Kienzle ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{a} =} trimmean (@var{x}, @var{p}) ## ## Compute the trimmed mean. ## ## The trimmed mean of @var{x} is defined as the mean of @var{x} excluding the ## highest and lowest @var{p} percent of the data. ## ## For example ## ## @example ## mean ([-inf, 1:9, inf]) ## @end example ## ## is NaN, while ## ## @example ## trimmean ([-inf, 1:9, inf], 10) ## @end example ## ## excludes the infinite values, which make the result 5. ## ## @seealso{mean} ## @end deftypefn function a = trimmean(x, p, varargin) if (nargin != 2 && nargin != 3) print_usage; endif y = sort(x, varargin{:}); sz = size(x); if nargin < 3 dim = min(find(sz>1)); if isempty(dim), dim=1; endif; else dim = varargin{1}; endif idx = cell (0); for i=1:length(sz), idx{i} = 1:sz(i); end; trim = round(sz(dim)*p*0.01); idx{dim} = 1+trim : sz(dim)-trim; a = mean (y (idx{:}), varargin{:}); endfunction statistics-1.3.0/inst/tripdf.m0000644000000000000000000000736712776476211014510 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1997-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} tripdf (@var{x}, @var{a}, @var{b}, @var{c}) ## Compute the probability density function (PDF) at @var{x} of the triangular ## distribution with parameters @var{a}, @var{b}, and @var{c} on the interval ## [@var{a}, @var{b}]. ## @end deftypefn ## Author: Dag Lyberg ## Description: PDF of the triangular distribution function pdf = tripdf (x, a, b, c) if (nargin != 4) print_usage (); endif if (! isscalar (a) || ! isscalar (b) || ! isscalar (c)) [retval, x, a, b, c] = common_size (x, a, b, c); if (retval > 0) error ("tripdf: X, A, B, and C must be of common size or scalars"); endif endif if (iscomplex (x) || iscomplex (a) || iscomplex (b) || iscomplex (c)) error ("tripdf: X, A, B, and C must not be complex"); endif if (isa (x, "single") || isa (a, "single") ... || isa (b, "single") || isa (c, "single")) pdf = zeros (size (x), "single"); else pdf = zeros (size (x)); endif k = isnan (x) | !(a < b) | !(c >= a) | !(c <= b) ; pdf(k) = NaN; k = (x >= a) & (x <= b) & (a < b) & (a <= c) & (c <= b); h = 2 ./ (b-a); if (isscalar (a) && isscalar (b) && isscalar (c)) j = k & (a <= x) & (x < c); pdf(j) = h * (x(j)-a) / (c-a); j = k & (x == c); pdf(j) = h; j = k & (c < x) & (x <= b); pdf(j) = h * (b-x(j)) / (b-c); else j = k & (a <= x) & (x < c); pdf(j) = h(j) .* (x(j)-a(j)) ./ (c(j)-a(j)); j = k & (x == c); pdf(j) = h(j); j = k & (c < x) & (x <= b); pdf(j) = h(j) .* (b(j)-x(j)) ./ (b(j)-c(j)); endif endfunction %!shared x,y,deps %! x = [-1, 0, 0.1, 0.5, 0.9, 1, 2] + 1; %! y = [0, 0, 0.4, 2, 0.4, 0, 0]; %! deps = 2*eps; %!assert (tripdf (x, ones (1,7), 2*ones (1,7), 1.5*ones (1,7)), y, deps) %!assert (tripdf (x, 1*ones (1,7), 2, 1.5), y, deps) %!assert (tripdf (x, 1, 2*ones (1,7), 1.5), y, deps) %!assert (tripdf (x, 1, 2, 1.5*ones (1,7)), y, deps) %!assert (tripdf (x, 1, 2, 1.5), y, deps) %!assert (tripdf (x, [1, 1, NaN, 1, 1, 1, 1], 2, 1.5), [y(1:2), NaN, y(4:7)], deps) %!assert (tripdf (x, 1, 2*[1, 1, NaN, 1, 1, 1, 1], 1.5), [y(1:2), NaN, y(4:7)], deps) %!assert (tripdf (x, 1, 2, 1.5*[1, 1, NaN, 1, 1, 1, 1]), [y(1:2), NaN, y(4:7)], deps) %!assert (tripdf ([x, NaN], 1, 2, 1.5), [y, NaN], deps) ## Test class of input preserved %!assert (tripdf (single ([x, NaN]), 1, 2, 1.5), single ([y, NaN]), eps('single')) %!assert (tripdf ([x, NaN], single (1), 2, 1.5), single ([y, NaN]), eps('single')) %!assert (tripdf ([x, NaN], 1, single (2), 1.5), single ([y, NaN]), eps('single')) %!assert (tripdf ([x, NaN], 1, 2, single (1.5)), single ([y, NaN]), eps('single')) ## Test input validation %!error tripdf () %!error tripdf (1) %!error tripdf (1,2) %!error tripdf (1,2,3) %!error tripdf (1,2,3,4,5) %!error tripdf (1, ones (3), ones (2), ones (2)) %!error tripdf (1, ones (2), ones (3), ones (2)) %!error tripdf (1, ones (2), ones (2), ones (3)) %!error tripdf (i, 2, 2, 2) %!error tripdf (2, i, 2, 2) %!error tripdf (2, 2, i, 2) %!error tripdf (2, 2, 2, i) statistics-1.3.0/inst/trirnd.m0000644000000000000000000001242712776476211014513 0ustar 00000000000000## Copyright (C) 2016 Dag Lyberg ## Copyright (C) 1995-2015 Kurt Hornik ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## . ## -*- texinfo -*- ## @deftypefn {} {} trirnd (@var{a}, @var{b}, @var{c}) ## @deftypefnx {} {} trirnd (@var{a}, @var{b}, @var{c}, @var{r}) ## @deftypefnx {} {} trirnd (@var{a}, @var{b}, @var{c}, @var{r}, @var{c}, @dots{}) ## @deftypefnx {} {} trirnd (@var{a}, @var{b}, @var{c}, [@var{sz}]) ## Return a matrix of random samples from the rectangular distribution with ## parameters @var{a}, @var{b}, and @var{c} on the interval [@var{a}, @var{b}]. ## ## When called with a single size argument, return a square matrix with ## the dimension specified. When called with more than one scalar argument the ## first two arguments are taken as the number of rows and columns and any ## further arguments specify additional matrix dimensions. The size may also ## be specified with a vector of dimensions @var{sz}. ## ## If no size arguments are given then the result matrix is the common size of ## @var{a}, @var{b} and @var{c}. ## @end deftypefn ## Author: Dag Lyberg ## Description: Random deviates from the triangular distribution function rnd = trirnd (a, b, c, varargin) if (nargin < 3) print_usage (); endif if (! isscalar (a) || ! isscalar (b) || ! isscalar (c)) [retval, a, b, c] = common_size (a, b, c); if (retval > 0) error ("trirnd: A, B, and C must be of common size or scalars"); endif endif if (iscomplex (a) || iscomplex (b) || iscomplex (c)) error ("trirnd: A, B, and C must not be complex"); endif if (nargin == 3) sz = size (a); elseif (nargin == 4) if (isscalar (varargin{1}) && varargin{1} >= 0) sz = [varargin{1}, varargin{1}]; elseif (isrow (varargin{1}) && all (varargin{1} >= 0)) sz = varargin{1}; else error ("trirnd: dimension vector must be row vector of non-negative integers"); endif elseif (nargin > 4) if (any (cellfun (@(x) (! isscalar (x) || x < 0), varargin))) error ("trirnd: dimensions must be non-negative integers"); endif sz = [varargin{:}]; endif if (! isscalar (a) && ! isequal (size (b), sz)) error ("trirnd: A, B, and C must be scalar or of size SZ"); endif if (isa (a, "single") || isa (b, "single") || isa (c, "single")) cls = "single"; else cls = "double"; endif if (isscalar (a) && isscalar (b) && isscalar (c)) if ((-Inf < a) && (a < b) && (a <= c) && (c <= b) && (b < Inf)) w = b-a; left_width = c-a; right_width = b-c; h = 2 / w; left_area = h * left_width / 2; rnd = rand (sz, cls); idx = rnd < left_area; rnd(idx) = a + (rnd(idx) * w * left_width).^0.5; rnd(~idx) = b - ((1-rnd(~idx)) * w * right_width).^0.5; else rnd = NaN (sz, cls); endif else w = b-a; left_width = c-a; right_width = b-c; h = 2 ./ w; left_area = h .* left_width / 2; rnd = rand (sz, cls); k = rnd < left_area; rnd(k) = a(k) + (rnd(k) .* w(k) .* left_width(k)).^0.5; rnd(~k) = b(~k) - ((1-rnd(~k)) .* w(~k) .* right_width(~k)).^0.5; k = ! (-Inf < a) | ! (a < b) | ! (a <= c) | ! (c <= b) | ! (b < Inf); rnd(k) = NaN; endif endfunction %!assert (size (trirnd (1,2,1.5)), [1, 1]) %!assert (size (trirnd (1*ones (2,1), 2,1.5)), [2, 1]) %!assert (size (trirnd (1*ones (2,2), 2,1.5)), [2, 2]) %!assert (size (trirnd (1, 2*ones (2,1), 1.5)), [2, 1]) %!assert (size (trirnd (1, 2*ones (2,2), 1.5)), [2, 2]) %!assert (size (trirnd (1, 2, 1.5*ones (2,1))), [2, 1]) %!assert (size (trirnd (1, 2, 1.5*ones (2,2))), [2, 2]) %!assert (size (trirnd (1, 2, 1.5, 3)), [3, 3]) %!assert (size (trirnd (1, 2, 1.5, [4 1])), [4, 1]) %!assert (size (trirnd (1, 2, 1.5, 4, 1)), [4, 1]) ## Test class of input preserved %!assert (class (trirnd (1,2,1.5)), "double") %!assert (class (trirnd (single (1),2,1.5)), "single") %!assert (class (trirnd (single ([1 1]),2,1.5)), "single") %!assert (class (trirnd (1,single (2),1.5)), "single") %!assert (class (trirnd (1,single ([2 2]),1.5)), "single") %!assert (class (trirnd (1,2,single (1.5))), "single") %!assert (class (trirnd (1,2,single ([1.5 1.5]))), "single") ## Test input validation %!error trirnd () %!error trirnd (1) %!error trirnd (1,2) %!error trirnd (ones (3), 2*ones (2), 1.5*ones (2), 2) %!error trirnd (ones (2), 2*ones (3), 1.5*ones (2), 2) %!error trirnd (ones (2), 2*ones (2), 1.5*ones (3), 2) %!error trirnd (i, 2, 1.5) %!error trirnd (1, i, 1.5) %!error trirnd (1, 2, i) %!error trirnd (1,2,1.5, -1) %!error trirnd (1,2,1.5, ones (2)) %!error trirnd (1,2,1.5, [2 -1 2]) %!error trirnd (1*ones (2),2,1.5, 3) %!error trirnd (1*ones (2),2,1.5, [3, 2]) %!error trirnd (1*ones (2),2,1.5, 3, 2) statistics-1.3.0/inst/tstat.m0000755000000000000000000000474312776476211014355 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} tstat (@var{n}) ## Compute mean and variance of the t (Student) distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{n} is the parameter of the t (Student) distribution. The elements ## of @var{n} must be positive ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the t (Student) distribution ## ## @item ## @var{v} is the variance of the t (Student) distribution ## @end itemize ## ## @subheading Example ## ## @example ## @group ## n = 3:8; ## [m, v] = tstat (n) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the t (Student) distribution function [m, v] = tstat (n) # Check arguments if (nargin != 1) print_usage (); endif if (! isempty (n) && ! ismatrix (n)) error ("tstat: n must be a numeric matrix"); endif # Calculate moments m = zeros (size (n)); v = n ./ (n - 2); # Continue argument check k = find (! (n > 1) | ! (n < Inf)); if (any (k)) m(k) = NaN; v(k) = NaN; endif k = find (! (n > 2) & (n < Inf)); if (any (k)) v(k) = Inf; endif endfunction %!test %! n = 3:8; %! [m, v] = tstat (n); %! expected_m = [0, 0, 0, 0, 0, 0]; %! expected_v = [3.0000, 2.0000, 1.6667, 1.5000, 1.4000, 1.3333]; %! assert (m, expected_m); %! assert (v, expected_v, 0.001); statistics-1.3.0/inst/ttest.m0000755000000000000000000001264312776476211014357 0ustar 00000000000000## Copyright (C) 2014 Tony Richardson ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{stats}] =} ttest (@var{x}) ## @deftypefnx {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{stats}] =} ttest (@var{x}, @var{m}) ## @deftypefnx {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{stats}] =} ttest (@var{x}, @var{y}) ## @deftypefnx {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{stats}] =} ttest (@var{x}, @var{m}, @var{Name}, @var{Value}) ## @deftypefnx {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{stats}] =} ttest (@var{x}, @var{y}, @var{Name}, @var{Value}) ## Test for mean of a normal sample with known variance. ## ## Perform a T-test of the null hypothesis @code{mean (@var{x}) == ## @var{m}} for a sample @var{x} from a normal distribution with unknown ## mean and unknown std deviation. Under the null, the test statistic ## @var{t} has a Student's t distribution. The default value of ## @var{m} is 0. ## ## If the second argument @var{y} is a vector, a paired-t test of the ## hypothesis @code{mean (@var{x}) = mean (@var{y})} is performed. ## ## Name-Value pair arguments can be used to set various options. ## @qcode{"alpha"} can be used to specify the significance level ## of the test (the default value is 0.05). @qcode{"tail"}, can be used ## to select the desired alternative hypotheses. If the value is ## @qcode{"both"} (default) the null is tested against the two-sided ## alternative @code{mean (@var{x}) != @var{m}}. ## If it is @qcode{"right"} the one-sided alternative @code{mean (@var{x}) ## > @var{m}} is considered. Similarly for @qcode{"left"}, the one-sided ## alternative @code{mean (@var{x}) < @var{m}} is considered. ## When argument @var{x} is a matrix, @qcode{"dim"} can be used to selection ## the dimension over which to perform the test. (The default is the ## first non-singleton dimension). ## ## If @var{h} is 0 the null hypothesis is accepted, if it is 1 the null ## hypothesis is rejected. The p-value of the test is returned in @var{pval}. ## A 100(1-alpha)% confidence interval is returned in @var{ci}. @var{stats} ## is a structure containing the value of the test statistic (@var{tstat}), ## the degrees of freedom (@var{df}) and the sample standard deviation ## (@var{sd}). ## ## @end deftypefn ## Author: Tony Richardson function [h, p, ci, stats] = ttest(x, my, varargin) % Set default arguments my_default = 0; alpha = 0.05; tail = 'both'; % Find the first non-singleton dimension of x dim = min(find(size(x)~=1)); if isempty(dim), dim = 1; end if (nargin == 1) my = my_default; end i = 1; while ( i <= length(varargin) ) switch lower(varargin{i}) case 'alpha' i = i + 1; alpha = varargin{i}; case 'tail' i = i + 1; tail = varargin{i}; case 'dim' i = i + 1; dim = varargin{i}; otherwise error('Invalid Name argument.',[]); end i = i + 1; end if ~isa(tail, 'char') error('tail argument to vartest2 must be a string\n',[]); end if any(and(~isscalar(my),size(x)~=size(my))) error('Arrays in paired test must be the same size.'); end % Set default values if arguments are present but empty if isempty(my) my = my_default; end % This adjustment allows everything else to remain the % same for both the one-sample t test and paired tests. x = x - my; % Calculate the test statistic value (tval) n = size(x, dim); x_bar = mean(x, dim); stats.tstat = 0; stats.df = n-1; stats.sd = std(x, 0, dim); x_bar_std = stats.sd/sqrt(n); tval = (x_bar)./x_bar_std; stats.tstat = tval; % Based on the "tail" argument determine the P-value, the critical values, % and the confidence interval. switch lower(tail) case 'both' p = 2*(1 - tcdf(abs(tval),n-1)); tcrit = -tinv(alpha/2,n-1); ci = [x_bar-tcrit*x_bar_std; x_bar+tcrit*x_bar_std] + my; case 'left' p = tcdf(tval,n-1); tcrit = -tinv(alpha,n-1); ci = [-inf*ones(size(x_bar)); my+x_bar+tcrit*x_bar_std]; case 'right' p = 1 - tcdf(tval,n-1); tcrit = -tinv(alpha,n-1); ci = [my+x_bar-tcrit*x_bar_std; inf*ones(size(x_bar))]; otherwise error('Invalid fifth (tail) argument to ttest\n',[]); end % Reshape the ci array to match MATLAB shaping if and(isscalar(x_bar), dim==2) ci = ci(:)'; elseif size(x_bar,2). ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{stats}] =} ttest2 (@var{x}, @var{y}) ## @deftypefnx {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{stats}] =} ttest2 (@var{x}, @var{y}, @var{Name}, @var{Value}) ## Test for mean of a normal sample with known variance. ## ## Perform a T-test of the null hypothesis @code{mean (@var{x}) == ## @var{m}} for a sample @var{x} from a normal distribution with unknown ## mean and unknown std deviation. Under the null, the test statistic ## @var{t} has a Student's t distribution. ## ## If the second argument @var{y} is a vector, a paired-t test of the ## hypothesis @code{mean (@var{x}) = mean (@var{y})} is performed. ## ## The argument @qcode{"alpha"} can be used to specify the significance level ## of the test (the default value is 0.05). The string ## argument @qcode{"tail"}, can be used to select the desired alternative ## hypotheses. If @qcode{"alt"} is @qcode{"both"} (default) the null is ## tested against the two-sided alternative @code{mean (@var{x}) != @var{m}}. ## If @qcode{"alt"} is @qcode{"right"} the one-sided ## alternative @code{mean (@var{x}) > @var{m}} is considered. ## Similarly for @qcode{"left"}, the one-sided alternative @code{mean ## (@var{x}) < @var{m}} is considered. When @qcode{"vartype"} is @qcode{"equal"} ## the variances are assumed to be equal (this is the default). When ## @qcode{"vartype"} is @qcode{"unequal"} the variances are not assumed equal. ## When argument @var{x} is a matrix the @qcode{"dim"} argument can be ## used to selection the dimension over which to perform the test. ## (The default is the first non-singleton dimension.) ## ## If @var{h} is 0 the null hypothesis is accepted, if it is 1 the null ## hypothesis is rejected. The p-value of the test is returned in @var{pval}. ## A 100(1-alpha)% confidence interval is returned in @var{ci}. @var{stats} ## is a structure containing the value of the test statistic (@var{tstat}), ## the degrees of freedom (@var{df}) and the sample standard deviation ## (@var{sd}). ## ## @end deftypefn ## Author: Tony Richardson function [h, p, ci, stats] = ttest2(x, y, varargin) alpha = 0.05; tail = 'both'; vartype = 'equal'; % Find the first non-singleton dimension of x dim = min(find(size(x)~=1)); if isempty(dim), dim = 1; end i = 1; while ( i <= length(varargin) ) switch lower(varargin{i}) case 'alpha' i = i + 1; alpha = varargin{i}; case 'tail' i = i + 1; tail = varargin{i}; case 'vartype' i = i + 1; vartype = varargin{i}; case 'dim' i = i + 1; dim = varargin{i}; otherwise error('Invalid Name argument.',[]); end i = i + 1; end if ~isa(tail, 'char') error('Tail argument to ttest2 must be a string\n',[]); end m = size(x, dim); n = size(y, dim); x_bar = mean(x,dim)-mean(y,dim); s1_var = var(x, 0, dim); s2_var = var(y, 0, dim); switch lower(vartype) case 'equal' stats.tstat = 0; stats.df = (m + n - 2)*ones(size(x_bar)); sp_var = ((m-1)*s1_var + (n-1)*s2_var)./stats.df; stats.sd = sqrt(sp_var); x_bar_std = sqrt(sp_var*(1/m+1/n)); case 'unequal' stats.tstat = 0; se1 = sqrt(s1_var/m); se2 = sqrt(s2_var/n); sp_var = s1_var/m + s2_var/n; stats.df = ((se1.^2+se2.^2).^2 ./ (se1.^4/(m-1) + se2.^4/(n-1))); stats.sd = [sqrt(s1_var); sqrt(s2_var)]; x_bar_std = sqrt(sp_var); otherwise error('Invalid fifth (vartype) argument to ttest2\n',[]); end stats.tstat = x_bar./x_bar_std; % Based on the "tail" argument determine the P-value, the critical values, % and the confidence interval. switch lower(tail) case 'both' p = 2*(1 - tcdf(abs(stats.tstat),stats.df)); tcrit = -tinv(alpha/2,stats.df); %ci = [x_bar-tcrit*stats.sd; x_bar+tcrit*stats.sd]; ci = [x_bar-tcrit.*x_bar_std; x_bar+tcrit.*x_bar_std]; case 'left' p = tcdf(stats.tstat,stats.df); tcrit = -tinv(alpha,stats.df); ci = [-inf*ones(size(x_bar)); x_bar+tcrit.*x_bar_std]; case 'right' p = 1 - tcdf(stats.tstat,stats.df); tcrit = -tinv(alpha,stats.df); ci = [x_bar-tcrit.*x_bar_std; inf*ones(size(x_bar))]; otherwise error('Invalid fourth (tail) argument to ttest2\n',[]); end % Reshape the ci array to match MATLAB shaping if and(isscalar(x_bar), dim==2) ci = ci(:)'; stats.sd = stats.sd(:)'; elseif size(x_bar,2) ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} unidstat (@var{n}) ## Compute mean and variance of the discrete uniform distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{n} is the parameter of the discrete uniform distribution. The elements ## of @var{n} must be positive natural numbers ## @end itemize ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the discrete uniform distribution ## ## @item ## @var{v} is the variance of the discrete uniform distribution ## @end itemize ## ## @subheading Example ## ## @example ## @group ## n = 1:6; ## [m, v] = unidstat (n) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the discrete uniform distribution function [m, v] = unidstat (n) # Check arguments if (nargin != 1) print_usage (); endif if (! isempty (n) && ! ismatrix (n)) error ("unidstat: n must be a numeric matrix"); endif # Calculate moments m = (n + 1) ./ 2; v = ((n .^ 2) - 1) ./ 12; # Continue argument check k = find (! (n > 0) | ! (n < Inf) | ! (n == round (n))); if (any (k)) m(k) = NaN; v(k) = NaN; endif endfunction %!test %! n = 1:6; %! [m, v] = unidstat (n); %! expected_m = [1.0000, 1.5000, 2.0000, 2.5000, 3.0000, 3.5000]; %! expected_v = [0.0000, 0.2500, 0.6667, 1.2500, 2.0000, 2.9167]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); statistics-1.3.0/inst/unifstat.m0000755000000000000000000000645012776476211015050 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} unifstat (@var{a}, @var{b}) ## Compute mean and variance of the continuous uniform distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{a} is the first parameter of the continuous uniform distribution ## ## @item ## @var{b} is the second parameter of the continuous uniform distribution ## @end itemize ## @var{a} and @var{b} must be of common size or one of them must be scalar ## and @var{a} must be less than @var{b} ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the continuous uniform distribution ## ## @item ## @var{v} is the variance of the continuous uniform distribution ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## a = 1:6; ## b = 2:2:12; ## [m, v] = unifstat (a, b) ## @end group ## ## @group ## [m, v] = unifstat (a, 10) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the continuous uniform distribution function [m, v] = unifstat (a, b) # Check arguments if (nargin != 2) print_usage (); endif if (! isempty (a) && ! ismatrix (a)) error ("unifstat: a must be a numeric matrix"); endif if (! isempty (b) && ! ismatrix (b)) error ("unifstat: b must be a numeric matrix"); endif if (! isscalar (a) || ! isscalar (b)) [retval, a, b] = common_size (a, b); if (retval > 0) error ("unifstat: a and b must be of common size or scalar"); endif endif # Calculate moments m = (a + b) ./ 2; v = ((b - a) .^ 2) ./ 12; # Continue argument check k = find (! (-Inf < a) | ! (a < b) | ! (b < Inf)); if (any (k)) m(k) = NaN; v(k) = NaN; endif endfunction %!test %! a = 1:6; %! b = 2:2:12; %! [m, v] = unifstat (a, b); %! expected_m = [1.5000, 3.0000, 4.5000, 6.0000, 7.5000, 9.0000]; %! expected_v = [0.0833, 0.3333, 0.7500, 1.3333, 2.0833, 3.0000]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); %!test %! a = 1:6; %! [m, v] = unifstat (a, 10); %! expected_m = [5.5000, 6.0000, 6.5000, 7.0000, 7.5000, 8.0000]; %! expected_v = [6.7500, 5.3333, 4.0833, 3.0000, 2.0833, 1.3333]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); statistics-1.3.0/inst/vartest.m0000755000000000000000000001067512776476211014707 0ustar 00000000000000## Copyright (C) 2014 Tony Richardson ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{stats}] =} vartest (@var{x}, @var{y}) ## @deftypefnx {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{stats}] =} vartest (@var{x}, @var{y}, @var{Name}, @var{Value}) ## Perform a F-test for equal variances. ## ## If the second argument @var{y} is a vector, a paired-t test of the ## hypothesis @code{mean (@var{x}) = mean (@var{y})} is performed. ## ## The argument @qcode{"alpha"} can be used to specify the significance level ## of the test (the default value is 0.05). The string ## argument @qcode{"tail"}, can be used to select the desired alternative ## hypotheses. If @qcode{"alt"} is @qcode{"both"} (default) the null is ## tested against the two-sided alternative @code{mean (@var{x}) != @var{m}}. ## If @qcode{"alt"} is @qcode{"right"} the one-sided ## alternative @code{mean (@var{x}) > @var{m}} is considered. ## Similarly for @qcode{"left"}, the one-sided alternative @code{mean ## (@var{x}) < @var{m}} is considered. When @qcode{"vartype"} is @qcode{"equal"} ## the variances are assumed to be equal (this is the default). When ## @qcode{"vartype"} is @qcode{"unequal"} the variances are not assumed equal. ## When argument @var{x} is a matrix the @qcode{"dim"} argument can be ## used to selection the dimension over which to perform the test. ## (The default is the first non-singleton dimension.) ## ## If @var{h} is 0 the null hypothesis is accepted, if it is 1 the null ## hypothesis is rejected. The p-value of the test is returned in @var{pval}. ## A 100(1-alpha)% confidence interval is returned in @var{ci}. @var{stats} ## is a structure containing the value of the test statistic (@var{tstat}), ## the degrees of freedom (@var{df}) and the sample standard deviation ## (@var{sd}). ## ## @end deftypefn ## Author: Tony Richardson ## Description: Test for mean of a normal sample with known variance function [h, p, ci, stats] = vartest(x, v, varargin) % Set default arguments alpha = 0.05; tail = 'both'; % Find the first non-singleton dimension of x dim = min(find(size(x)~=1)); if isempty(dim), dim = 1; end i = 1; while ( i <= length(varargin) ) switch lower(varargin{i}) case 'alpha' i = i + 1; alpha = varargin{i}; case 'tail' i = i + 1; tail = varargin{i}; case 'dim' i = i + 1; dim = varargin{i}; otherwise error('Invalid Name argument.',[]); end i = i + 1; end if ~isa(tail, 'char') error('tail argument to vartest must be a string\n',[]); end s_var = var(x, 0, dim); df = size(x, dim) - 1; stats.chisqstat = df*s_var/v; % Based on the "tail" argument determine the P-value, the critical values, % and the confidence interval. switch lower(tail) case 'both' p = 2*min(chi2cdf(stats.chisqstat,df),1-chi2cdf(stats.chisqstat,df)); ci = [df*s_var ./ (chi2inv(1-alpha/2,df)); df*s_var ./ (chi2inv(alpha/2,df))]; case 'left' p = chi2cdf(stats.chisqstat,df); chi2crit = chi2inv(alpha,df); ci = [zeros(size(stats.chisqstat)); df*s_var ./ (chi2inv(alpha,df))]; case 'right' p = 1 - chi2cdf(stats.chisqstat,df); chi2crit = chi2inv(1-alpha,df); ci = [df*s_var ./ (chi2inv(1-alpha,df)); inf*ones(size(stats.chisqstat))]; otherwise error('Invalid fourth (tail) argument to vartest\n',[]); end % Reshape the ci array to match MATLAB shaping if and(isscalar(stats.chisqstat), dim==2) ci = ci(:)'; elseif size(stats.chisqstat,2). ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{stats}] =} vartest2 (@var{x}, @var{y}) ## @deftypefnx {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{stats}] =} vartest2 (@var{x}, @var{y}, @var{Name}, @var{Value}) ## Perform a F-test for equal variances. ## ## If the second argument @var{y} is a vector, a paired-t test of the ## hypothesis @code{mean (@var{x}) = mean (@var{y})} is performed. ## ## The argument @qcode{"alpha"} can be used to specify the significance level ## of the test (the default value is 0.05). The string ## argument @qcode{"tail"}, can be used to select the desired alternative ## hypotheses. If @qcode{"alt"} is @qcode{"both"} (default) the null is ## tested against the two-sided alternative @code{mean (@var{x}) != @var{m}}. ## If @qcode{"alt"} is @qcode{"right"} the one-sided ## alternative @code{mean (@var{x}) > @var{m}} is considered. ## Similarly for @qcode{"left"}, the one-sided alternative @code{mean ## (@var{x}) < @var{m}} is considered. When @qcode{"vartype"} is @qcode{"equal"} ## the variances are assumed to be equal (this is the default). When ## @qcode{"vartype"} is @qcode{"unequal"} the variances are not assumed equal. ## When argument @var{x} is a matrix the @qcode{"dim"} argument can be ## used to selection the dimension over which to perform the test. ## (The default is the first non-singleton dimension.) ## ## If @var{h} is 0 the null hypothesis is accepted, if it is 1 the null ## hypothesis is rejected. The p-value of the test is returned in @var{pval}. ## A 100(1-alpha)% confidence interval is returned in @var{ci}. @var{stats} ## is a structure containing the value of the test statistic (@var{tstat}), ## the degrees of freedom (@var{df}) and the sample standard deviation ## (@var{sd}). ## ## @end deftypefn ## Author: Tony Richardson ## Description: Test for mean of a normal sample with known variance function [h, p, ci, stats] = vartest2(x, y, varargin) % Set default arguments alpha = 0.05; tail = 'both'; % Find the first non-singleton dimension of x dim = min(find(size(x)~=1)); if isempty(dim), dim = 1; end i = 1; while ( i <= length(varargin) ) switch lower(varargin{i}) case 'alpha' i = i + 1; alpha = varargin{i}; case 'tail' i = i + 1; tail = varargin{i}; case 'dim' i = i + 1; dim = varargin{i}; otherwise error('Invalid Name argument.',[]); end i = i + 1; end if ~isa(tail, 'char') error('tail argument to vartest2 must be a string\n',[]); end s1_var = var(x, 0, dim); s2_var = var(y, 0, dim); stats.fstat = s1_var ./ s2_var; df1= size(x, dim) - 1; df2 = size(y, dim) - 1; % Based on the "tail" argument determine the P-value, the critical values, % and the confidence interval. switch lower(tail) case 'both' p = 2*min(fcdf(stats.fstat,df1,df2),1 - fcdf(stats.fstat,df1,df2)); fcrit = finv(1-alpha/2,df1,df2); ci = [s1_var ./ (fcrit*s2_var); fcrit*s1_var ./ s2_var]; case 'left' p = fcdf(stats.fstat,df1,df2); fcrit = finv(alpha,df1,df2); ci = [zeros(size(stats.fstat)); s1_var ./ (fcrit*s2_var)]; case 'right' p = 1 - fcdf(stats.fstat,df1,df2); fcrit = finv(1-alpha,df1,df2); ci = [s1_var ./ (fcrit*s2_var); inf*ones(size(stats.fstat))]; otherwise error('Invalid fourth (tail) argument to vartest2\n',[]); end % Reshape the ci array to match MATLAB shaping if and(isscalar(stats.fstat), dim==2) ci = ci(:)'; elseif size(stats.fstat,2). ## Author: Juan Pablo Carbajal ## -*- texinfo -*- ## @defun {@var{h} =} violin (@var{x}) ## @defunx {@var{h} =} violin (@dots{}, @var{property}, @var{value}, @dots{}) ## @defunx {@var{h} =} violin (@var{hax}, @dots{}) ## @defunx {@var{h} =} violin (@dots{}, @asis{"horizontal"}) ## Produce a Violin plot of the data @var{x}. ## ## The input data @var{x} can be a N-by-m array containg N observations of m variables. ## It can also be a cell with m elements, for the case in which the varibales ## are not uniformly sampled. ## ## The following @var{property} can be set using @var{property}/@var{value} pairs ## (default values in parenthesis). ## The value of the property can be a scalar indicating that it applies ## to all the variables in the data. ## It can also be a cell/array, indicating the property for each variable. ## In this case it should have m columns (as many as variables). ## ## @table @asis ## ## @item Color ## (@asis{"y"}) Indicates the filling color of the violins. ## ## @item Nbins ## (50) Internally, the function calls @command{hist} to compute the histogram of the data. ## This property indicates how many bins to use. See @command{help hist} ## for more details. ## ## @item SmoothFactor ## (4) The fuction performs simple kernel density estimation and automatically ## finds the bandwith of the kernel function that best approximates the histogram ## using optimization (@command{sqp}). ## The result is in general very noisy. To smooth the result the bandwidth is ## multiplied by the value of this property. The higher the value the smoother ## the violings, but values too high might remove features from the data distribution. ## ## @item Bandwidth ## (NA) If this property is given a value other than NA, it sets the bandwith of the ## kernel function. No optimization is peformed and the property @asis{SmoothFactor} ## is ignored. ## ## @item Width ## (0.5) Sets the maximum width of the violins. Violins are centered at integer axis ## values. The distance between two violin middle axis is 1. Setting a value ## higher thna 1 in this property will cause the violins to overlap. ## @end table ## ## If the string @asis{"Horizontal"} is among the input arguments, the violin ## plot is rendered along the x axis with the variables in the y axis. ## ## The returned structure @var{h} has handles to the plot elements, allowing ## customization of the visualization using set/get functions. ## ## Example: ## ## @example ## title ("Grade 3 heights"); ## axis ([0,3]); ## set (gca, "xtick", 1:2, "xticklabel", @{"girls"; "boys"@}); ## h = violin (@{randn(100,1)*5+140, randn(130,1)*8+135@}, "Nbins", 10); ## set (h.violin, "linewidth", 2) ## @end example ## ## @seealso{boxplot, hist} ## @end defun function h = violin (ax, varargin) ###################### ## Parse parameters ## parser = inputParser (); parser.CaseSensitive = false; parser.FunctionName = 'violin'; parser.addParamValue ('Nbins', 50); parser.addParamValue ('SmoothFactor', 4); parser.addParamValue ('Bandwidth', NA); parser.addParamValue ('Width', 0.5); parser.addParamValue ('Color', "y"); parser.addSwitch ('Horizontal'); parser.parse (varargin{:}); res = parser.Results; c = res.Color; # Color of violins if (ischar (c)) c = c(:); endif nb = res.Nbins; # Number of bins in histogram sf = res.SmoothFactor; # Smoothing factor for kernel estimation r0 = res.Bandwidth; # User value for KDE bandwth to prevent optimization is_horiz = res.Horizontal; # Whether the plot must be rotated width = res.Width; # Width of the violins clear parser res ###################### old_hold = ishold (); # First argument is not an axis if (~ishandle (ax) || ~isscalar (ax)) if (~old_hold) cla endif x = ax; ax = gca (); else x = varargin{1}; endif ## Make everything a cell for code simplicity if (~iscell (x)) [N Nc] = size (x); x = mat2cell (x, N, ones (1, Nc)); else Nc = numel (x); endif try [nb, c, sf, r0, width] = to_cell (nb, c, sf, r0, width, Nc); catch err if strcmp (err.identifier, "to_cell:element_idx") n = str2num (err.message); txt = {"Nbins", "Color", "SmoothFactor", "Bandwidth", "Width"}; error ("Octave:invaid-input-arg", ... ["options should be scalars or call/array with as many values as" ... " numbers of variables in the data (wrong size of %s)."], txt{n}); else rethrow (lasterror()) endif end ## Build violins [px py mx] = cellfun (@(y,n,s,r)build_polygon(y, n, s, r), ... x, nb, sf, r0, "unif", 0); Nc = 1:numel (px); Ncc = mat2cell (Nc, 1, ones (1, Nc(end))); # get hold state old_hold = ishold (); # Draw plain violins tmp = cellfun (@(x,y,n,u, w)patch(ax, (w * x + n)(:), y(:) ,u), ... px, py, Ncc, c, width); h.violin = tmp; hold on # Overlay mean value tmp = cellfun (@(z,y)plot(ax, z, y,'.k', "markersize", 6), Ncc, mx); h.mean = tmp; # Overlay median Mx = cellfun (@median, x, "unif", 0); tmp = cellfun (@(z,y)plot(ax, z, y, 'ok'), Ncc, Mx); h.median = tmp; # Overlay 1nd and 3th quartiles LUBU = cellfun (@(x,y)abs(quantile(x,[0.25 0.75])-y), x, Mx, "unif", 0); tmp = cellfun (@(x,y,z)errorbar(ax, x, y, z(1),z(2)), Ncc, Mx, LUBU)(:); # Flatten errorbar output handles tmp2 = allchild (tmp); if (~iscell (tmp2)) tmp2 = mat2cell (tmp2, ones(length (tmp2), 1), 1); endif tmp = mat2cell (tmp, ones (length (tmp), 1), 1); tmp = cellfun (@vertcat, tmp, tmp2, "unif", 0); h.quartile = cell2mat (tmp); hold off # Rotate the plot if it is horizontal if (is_horiz) structfun (@swap_axes, h); set (ax, "ytick", Nc); else set (ax, "xtick", Nc); endif if (nargout < 1); clear h; endif # restore hold state if (old_hold) hold on endif endfunction function k = kde(x,r) k = mean (stdnormal_pdf (x / r)) / r; k /= max (k); endfunction function [px py mx] = build_polygon (x, nb, sf, r) N = size (x, 1); mx = mean (x); sx = std (x); X = (x - mx ) / sx; [count bin] = hist (X, nb); count /= max (count); X = X - bin; if isna (r) r0 = 1.06 * N^(1/5); r = sqp (r0, @(r)sumsq (kde(X,r) - count), [], [], 1e-3, 1e2); else sf = 1; endif v = kde (X, sf * r).'; px = [v -flipud(v)]; bin = bin * sx + mx; py = [bin; fliplr(bin)].'; endfunction function tf = swap_axes (h) tmp = mat2cell (h(:), ones (length (h),1), 1); % tmp = cellfun (@(x)[x; allchild(x)], tmp, "unif", 0); tmpy = cellfun(@(x)get(x, "ydata"), tmp, "unif", 0); tmpx = cellfun(@(x)get(x, "xdata"), tmp, "unif", 0); cellfun (@(h,x,y)set (h, "xdata", y, "ydata", x), tmp, tmpx, tmpy); tf = true; endfunction function varargout = to_cell (varargin) m = varargin{end}; varargin(end) = []; for i = 1:numel(varargin) x = varargin{i}; if (isscalar (x)) x = repmat (x, m, 1); endif if (iscell (x)) if (numel(x) ~= m) # no dimension equals m error ("to_cell:element_idx", "%d\n",i); endif varargout{i} = x; continue endif sz = size (x); d = find (sz == m) if (isempty (d)) # no dimension equals m error ("to_cell:element_idx", "%d\n",i); elseif (length (d) == 2) #both dims are m, choose 1st elseif (d == 1) # 2nd dimension is m --> transpose x = x.'; sz = fliplr (sz); endif varargout{i} = mat2cell (x, sz(1), ones (m,1)); endfor endfunction %!demo %! clf %! x = zeros (9e2, 10); %! for i=1:10 %! x(:,i) = (0.1 * randn (3e2, 3) * (randn (3,1) + 1) + ... %! 2 * randn (1,3))(:); %! endfor %! h = violin (x, "color", "c"); %! axis tight %! set (h.violin, "linewidth", 2); %! set (gca, "xgrid", "on"); %! xlabel ("Variables") %! ylabel ("Values") %!demo %! clf %! data = {randn(100,1)*5+140, randn(130,1)*8+135}; %! subplot (1,2,1) %! title ("Grade 3 heights - vertical"); %! set (gca, "xtick", 1:2, "xticklabel", {"girls"; "boys"}); %! violin (data, "Nbins", 10); %! axis tight %! %! subplot(1,2,2) %! title ("Grade 3 heights - horizontal"); %! set (gca, "ytick", 1:2, "yticklabel", {"girls"; "boys"}); %! violin (data, "horizontal", "Nbins", 10); %! axis tight %!demo %! clf %! data = exprnd (0.1, 500,4); %! violin (data, "nbins", {5,10,50,100}); %! axis ([0 5 0 max(data(:))]) %!demo %! clf %! data = exprnd (0.1, 500,4); %! violin (data, "color", jet(4)); %! axis ([0 5 0 max(data(:))]) %!demo %! clf %! data = repmat(exprnd (0.1, 500,1), 1, 4); %! violin (data, "width", linspace (0.1,0.5,4)); %! axis ([0 5 0 max(data(:))]) %!demo %! clf %! data = repmat(exprnd (0.1, 500,1), 1, 4); %! violin (data, "nbins", [5,10,50,100], "smoothfactor", [4 4 8 10]); %! axis ([0 5 0 max(data(:))]) statistics-1.3.0/inst/vmpdf.m0000755000000000000000000000303712776476211014325 0ustar 00000000000000## Copyright (C) 2009 Soren Hauberg ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} @var{theta} = vmpdf (@var{x}, @var{mu}, @var{k}) ## Evaluates the Von Mises probability density function. ## ## The Von Mises distribution has probability density function ## @example ## f (@var{x}) = exp (@var{k} * cos (@var{x} - @var{mu})) / @var{Z} , ## @end example ## where @var{Z} is a normalisation constant. By default, @var{mu} is 0 and ## @var{k} is 1. ## @seealso{vmrnd} ## @end deftypefn function p = vmpdf (x, mu = 0, k = 1) ## Check input if (!isreal (x)) error ("vmpdf: first input must be real"); endif if (!isreal (mu)) error ("vmpdf: second input must be a scalar"); endif if (!isreal (k) || k <= 0) error ("vmpdf: third input must be a real positive scalar"); endif ## Evaluate PDF Z = 2 * pi * besseli (0, k); p = exp (k * cos (x-mu)) / Z; endfunction statistics-1.3.0/inst/vmrnd.m0000755000000000000000000000474012776476211014341 0ustar 00000000000000## Copyright (C) 2009 Soren Hauberg ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} @var{theta} = vmrnd (@var{mu}, @var{k}) ## @deftypefnx{Function File} @var{theta} = vmrnd (@var{mu}, @var{k}, @var{sz}) ## Draw random angles from a Von Mises distribution with mean @var{mu} and ## concentration @var{k}. ## ## The Von Mises distribution has probability density function ## @example ## f (@var{x}) = exp (@var{k} * cos (@var{x} - @var{mu})) / @var{Z} , ## @end example ## where @var{Z} is a normalisation constant. ## ## The output, @var{theta}, is a matrix of size @var{sz} containing random angles ## drawn from the given Von Mises distribution. By default, @var{mu} is 0 ## and @var{k} is 1. ## @seealso{vmpdf} ## @end deftypefn function theta = vmrnd (mu = 0, k = 1, sz = 1) ## Check input if (!isreal (mu)) error ("vmrnd: first input must be a scalar"); endif if (!isreal (k) || k <= 0) error ("vmrnd: second input must be a real positive scalar"); endif if (isscalar (sz)) sz = [sz, sz]; elseif (!isvector (sz)) error ("vmrnd: third input must be a scalar or a vector"); endif ## Simulate! if (k < 1e-6) ## k is small: sample uniformly on circle theta = 2 * pi * rand (sz) - pi; else a = 1 + sqrt (1 + 4 * k.^2); b = (a - sqrt (2 * a)) / (2 * k); r = (1 + b^2) / (2 * b); N = prod (sz); notdone = true (N, 1); while (any (notdone)) u (:, notdone) = rand (3, N); z (notdone) = cos (pi * u (1, notdone)); f (notdone) = (1 + r * z (notdone)) ./ (r + z (notdone)); c (notdone) = k * (r - f (notdone)); notdone = (u (2, :) >= c .* (2 - c)) & (log (c) - log (u (2, :)) + 1 - c < 0); N = sum (notdone); endwhile theta = mu + sign (u (3, :) - 0.5) .* acos (f); theta = reshape (theta, sz); endif endfunction statistics-1.3.0/inst/wblstat.m0000755000000000000000000000667712776476211014706 0ustar 00000000000000## Copyright (C) 2006, 2007 Arno Onken ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{m}, @var{v}] =} wblstat (@var{scale}, @var{shape}) ## Compute mean and variance of the Weibull distribution. ## ## @subheading Arguments ## ## @itemize @bullet ## @item ## @var{scale} is the scale parameter of the Weibull distribution. ## @var{scale} must be positive ## ## @item ## @var{shape} is the shape parameter of the Weibull distribution. ## @var{shape} must be positive ## @end itemize ## @var{scale} and @var{shape} must be of common size or one of them must be ## scalar ## ## @subheading Return values ## ## @itemize @bullet ## @item ## @var{m} is the mean of the Weibull distribution ## ## @item ## @var{v} is the variance of the Weibull distribution ## @end itemize ## ## @subheading Examples ## ## @example ## @group ## scale = 3:8; ## shape = 1:6; ## [m, v] = wblstat (scale, shape) ## @end group ## ## @group ## [m, v] = wblstat (6, shape) ## @end group ## @end example ## ## @subheading References ## ## @enumerate ## @item ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, ## 2001. ## ## @item ## Athanasios Papoulis. @cite{Probability, Random Variables, and Stochastic ## Processes}. McGraw-Hill, New York, second edition, 1984. ## @end enumerate ## @end deftypefn ## Author: Arno Onken ## Description: Moments of the Weibull distribution function [m, v] = wblstat (scale, shape) # Check arguments if (nargin != 2) print_usage (); endif if (! isempty (scale) && ! ismatrix (scale)) error ("wblstat: scale must be a numeric matrix"); endif if (! isempty (shape) && ! ismatrix (shape)) error ("wblstat: shape must be a numeric matrix"); endif if (! isscalar (scale) || ! isscalar (shape)) [retval, scale, shape] = common_size (scale, shape); if (retval > 0) error ("wblstat: scale and shape must be of common size or scalar"); endif endif # Calculate moments m = scale .* gamma (1 + 1 ./ shape); v = (scale .^ 2) .* gamma (1 + 2 ./ shape) - m .^ 2; # Continue argument check k = find (! (scale > 0) | ! (scale < Inf) | ! (shape > 0) | ! (shape < Inf)); if (any (k)) m(k) = NaN; v(k) = NaN; endif endfunction %!test %! scale = 3:8; %! shape = 1:6; %! [m, v] = wblstat (scale, shape); %! expected_m = [3.0000, 3.5449, 4.4649, 5.4384, 6.4272, 7.4218]; %! expected_v = [9.0000, 3.4336, 2.6333, 2.3278, 2.1673, 2.0682]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); %!test %! shape = 1:6; %! [m, v] = wblstat (6, shape); %! expected_m = [ 6.0000, 5.3174, 5.3579, 5.4384, 5.5090, 5.5663]; %! expected_v = [36.0000, 7.7257, 3.7920, 2.3278, 1.5923, 1.1634]; %! assert (m, expected_m, 0.001); %! assert (v, expected_v, 0.001); statistics-1.3.0/inst/wishpdf.m0000755000000000000000000000542212776476211014655 0ustar 00000000000000## Copyright (C) 2013 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License along with Octave; see the file COPYING. If not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {} @var{y} = wishpdf (@var{W}, @var{Sigma}, @var{df}, @var{log_y}=false) ## Compute the probability density function of the Wishart distribution ## ## Inputs: A @var{p} x @var{p} matrix @var{W} where to find the PDF. The @var{p} x @var{p} positive definite matrix @var{Sigma} and scalar degrees of freedom parameter @var{df} characterizing the Wishart distribution. (For the density to be finite, need @var{df} > (@var{p} - 1).) ## If the flag @var{log_y} is set, return the log probability density -- this helps avoid underflow when the numerical value of the density is very small ## ## Output: @var{y} is the probability density of Wishart(@var{Sigma}, @var{df}) at @var{W}. ## ## @seealso{wishrnd, iwishpdf} ## @end deftypefn ## Author: Nir Krakauer ## Description: Compute the probability density function of the Wishart distribution function [y] = wishpdf(W, Sigma, df, log_y=false) if (nargin < 3) print_usage (); endif p = size(Sigma, 1); if (df <= (p - 1)) error('df too small, no finite densities exist') endif #calculate the logarithm of G_d(df/2), the multivariate gamma function g = (p * (p-1) / 4) * log(pi); for i = 1:p g = g + log(gamma((df + (1 - i))/2)); #using lngamma_gsl(.) from the gsl package instead of log(gamma(.)) might help avoid underflow/overflow endfor C = chol(Sigma); #use formulas for determinant of positive definite matrix for better efficiency and numerical accuracy logdet_W = 2*sum(log(diag(chol(W)))); logdet_Sigma = 2*sum(log(diag(C))); y = -(df*p)/2 * log(2) - (df/2)*logdet_Sigma - g + ((df - p - 1)/2)*logdet_W - trace(chol2inv(C)*W)/2; if ~log_y y = exp(y); endif endfunction ##test results cross-checked against dwish function in R MCMCpack library %!assert(wishpdf(4, 3, 3.1), 0.07702496, 1E-7); %!assert(wishpdf([2 -0.3;-0.3 4], [1 0.3;0.3 1], 4), 0.004529741, 1E-7); %!assert(wishpdf([6 2 5; 2 10 -5; 5 -5 25], [9 5 5; 5 10 -8; 5 -8 22], 5.1), 4.474865e-10, 1E-15); %% Test input validation %!error wishpdf () %!error wishpdf (1, 2) %!error wishpdf (1, 2, 0) %!error wishpdf (1, 2) statistics-1.3.0/inst/wishrnd.m0000755000000000000000000000637412776476211014676 0ustar 00000000000000## Copyright (C) 2013 Nir Krakauer ## ## This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License along with Octave; see the file COPYING. If not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {} [@var{W}[, @var{D}]] = wishrnd (@var{Sigma}, @var{df}[, @var{D}][, @var{n}=1]) ## Return a random matrix sampled from the Wishart distribution with given parameters ## ## Inputs: the @var{p} x @var{p} positive definite matrix @var{Sigma} and scalar degrees of freedom parameter @var{df} (and optionally the Cholesky factor @var{D} of @var{Sigma}). ## @var{df} can be non-integer as long as @var{df} > @var{p} ## ## Output: a random @var{p} x @var{p} matrix @var{W} from the Wishart(@var{Sigma}, @var{df}) distribution. If @var{n} > 1, then @var{W} is @var{p} x @var{p} x @var{n} and holds @var{n} such random matrices. (Optionally, the Cholesky factor @var{D} of @var{Sigma} is also returned.) ## ## Averaged across many samples, the mean of @var{W} should approach @var{df}*@var{Sigma}, and the variance of each element @var{W}_ij should approach @var{df}*(@var{Sigma}_ij^2 + @var{Sigma}_ii*@var{Sigma}_jj) ## ## Reference: Yu-Cheng Ku and Peter Bloomfield (2010), Generating Random Wishart Matrices with Fractional Degrees of Freedom in OX, http://www.gwu.edu/~forcpgm/YuChengKu-030510final-WishartYu-ChengKu.pdf ## ## @seealso{iwishrnd, wishpdf} ## @end deftypefn ## Author: Nir Krakauer ## Description: Compute the probability density function of the Wishart distribution function [W, D] = wishrnd(Sigma, df, D, n=1) if (nargin < 2) print_usage (); endif if nargin < 3 || isempty(D) try D = chol(Sigma); catch error('wishrnd: Cholesky decomposition failed; Sigma probably not positive definite') end_try_catch endif p = size(D, 1); if df < p df = floor(df); #distribution not defined for small noninteger df df_isint = 1; else #check for integer degrees of freedom df_isint = (df == floor(df)); endif if ~df_isint [ii, jj] = ind2sub([p, p], 1:(p*p)); endif if n > 1 W = nan(p, p, n); endif for i = 1:n if df_isint Z = randn(df, p) * D; W(:, :, i) = Z'*Z; else Z = diag(sqrt(chi2rnd(df - (0:(p-1))))); #fill diagonal #note: chi2rnd(x) is equivalent to 2*randg(x/2), but the latter seems to offer no performance advantage Z(ii > jj) = randn(p*(p-1)/2, 1); #fill lower triangle with normally distributed variates Z = D * Z; W(:, :, i) = Z*Z'; endif endfor endfunction %!assert(size (wishrnd (1,2,1)), [1, 1]); %!assert(size (wishrnd ([],2,1)), [1, 1]); %!assert(size (wishrnd ([3 1; 1 3], 2.00001, [], 1)), [2, 2]); %!assert(size (wishrnd (eye(2), 2, [], 3)), [2, 2, 3]); %% Test input validation %!error wishrnd () %!error wishrnd (1) %!error wishrnd ([1; 1], 2) statistics-1.3.0/inst/ztest.m0000755000000000000000000001021512776476211014356 0ustar 00000000000000## Copyright (C) 2014 Tony Richardson ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{z}, @var{zcrit}] =} ztest (@var{x}, @var{m}, @var{s}) ## @deftypefnx {Function File} {[@var{h}, @var{pval}, @var{ci}, @var{z}, @var{zcrit}] =} ztest (@var{x}, @var{m}, @var{s}, @var{Name}, @var{Value}) ## Test for mean of a normal sample with known variance. ## ## Perform a Z-test of the null hypothesis @code{mean (@var{x}) == @var{m}} ## for a sample @var{x} from a normal distribution with unknown ## mean and known std deviation @var{s}. Under the null, the test statistic ## @var{z} follows a standard normal distribution. ## ## Name-Value pair arguments can be used to set various options. ## @qcode{"alpha"} can be used to specify the significance level ## of the test (the default value is 0.05). @qcode{"tail"}, can be used ## to select the desired alternative hypotheses. If the value is ## @qcode{"both"} (default) the null is tested against the two-sided ## alternative @code{mean (@var{x}) != @var{m}}. ## If it is @qcode{"right"} the one-sided alternative @code{mean (@var{x}) ## > @var{m}} is considered. Similarly for @qcode{"left"}, the one-sided ## alternative @code{mean (@var{x}) < @var{m}} is considered. ## When argument @var{x} is a matrix, @qcode{"dim"} can be used to selection ## the dimension over which to perform the test. (The default is the ## first non-singleton dimension.) ## ## If @var{h} is 0 the null hypothesis is accepted, if it is 1 the null ## hypothesis is rejected. The p-value of the test is returned in @var{pval}. ## A 100(1-alpha)% confidence interval is returned in @var{ci}. The test statistic ## value is returned in @var{z} and the z critical value in @var{zcrit}. ## ## @end deftypefn ## Author: Tony Richardson function [h, p, ci, zval, zcrit] = ztest(x, m, sigma, varargin) alpha = 0.05; tail = 'both'; % Find the first non-singleton dimension of x dim = min(find(size(x)~=1)); if isempty(dim), dim = 1; end i = 1; while ( i <= length(varargin) ) switch lower(varargin{i}) case 'alpha' i = i + 1; alpha = varargin{i}; case 'tail' i = i + 1; tail = varargin{i}; case 'dim' i = i + 1; dim = varargin{i}; otherwise error('Invalid Name argument.',[]); end i = i + 1; end if ~isa(tail, 'char') error('tail argument to ztest must be a string\n',[]); end % Calculate the test statistic value (zval) n = size(x, dim); x_bar = mean(x, dim); x_bar_std = sigma/sqrt(n); zval = (x_bar - m)./x_bar_std; % Based on the "tail" argument determine the P-value, the critical values, % and the confidence interval. switch lower(tail) case 'both' p = 2*(1 - normcdf(abs(zval))); zcrit = -norminv(alpha/2); ci = [x_bar-zcrit*x_bar_std; x_bar+zcrit*x_bar_std]; case 'left' p = normcdf(zval); zcrit = -norminv(alpha); ci = [-inf*ones(size(x_bar)); x_bar+zcrit*x_bar_std]; case 'right' p = 1 - normcdf(zval); zcrit = -norminv(alpha); ci = [x_bar-zcrit*x_bar_std; inf*ones(size(x_bar))]; otherwise error('Invalid fifth (tail) argument to ztest\n',[]); end % Reshape the ci array to match MATLAB shaping if and(isscalar(x_bar), dim==2) ci = ci(:)'; elseif size(x_bar,2) octave-statistics www.octave.org-octave.desktop Statistics Additional statistics functions for GNU Octave statistics cross-validation probability distributions regression data fitting ANOVA boxplot dendrogram Hidden Markov models Hypothesis testing randomization machine learning PCA principal component analysis k-means clustering http://octave.sourceforge.net/statistics https://savannah.gnu.org/bugs/?func=additem&group=octave GPL-3.0+ maintainers@octave.org FSFAP statistics-1.3.0/test/caseread.dat0000755000000000000000000000001212776476211015264 0ustar 00000000000000a bcd efstatistics-1.3.0/test/tblread-space.dat0000755000000000000000000000002412776476211016226 0ustar 00000000000000 a bc de 1 2 f 3 4statistics-1.3.0/test/tblread-tab.dat0000755000000000000000000000007012776476211015702 0ustar 00000000000000 a bc de 1 2 f 3 4