NaN/0000775002356700235670000000000012546555136011756 5ustar schloeglschloeglNaN/src/0000775002356700235670000000000012546555136012545 5ustar schloeglschloeglNaN/src/svm_model_matlab.h0000664002356700235670000000367711553522126016226 0ustar schloeglschloegl/* This code was extracted from libsvm-mat-2.9-1 in Jan 2010 Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither name of copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Copyright (C) 2010 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ */ #ifdef __cplusplus extern "C" { #endif const char *model_to_matlab_structure(mxArray *plhs[], int num_of_feature, struct svm_model *model); struct svm_model *matlab_matrix_to_model(const mxArray *matlab_struct, const char **error_message); #ifdef __cplusplus } #endif NaN/src/kth_element.cpp0000664002356700235670000001547411553522126015551 0ustar schloeglschloegl//------------------------------------------------------------------- // C-MEX implementation of kth element - this function is part of the NaN-toolbox. // // usage: x = kth_element(X,k [,flag]) // returns sort(X)(k) // // References: // [1] https://secure.wikimedia.org/wikipedia/en/wiki/Selection_algorithm // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, see . // // // Input: // X data vector, must be double/real // k which element should be selected // flag [optional]: // 0: data in X might be reorded (partially sorted) in-place and // is slightly faster because no local copy is generated // data with NaN is not correctly handled. // 1: data in X is never modified in-place, but a local copy is used. // data with NaN is not correctly handled. // 2: copies data and excludes all NaN's, the copying might be slower // than 1, but it enables a faster selection algorithm. // This is the save but slowest option // // Output: // x = sort(X)(k) // // $Id$ // Copyright (C) 2010,2011 Alois Schloegl // This function is part of the NaN-toolbox // http://pub.ist.ac.at/~schloegl/matlab/NaN/ // //------------------------------------------------------------------- #include #include #include #include #include "mex.h" #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; typedef int mwIndex; #endif #endif #define SWAP(a,b) {temp = a; a=b; b=temp;} static void findFirstK(double *array, size_t left, size_t right, size_t k) { while (right > left) { mwIndex pivotIndex = (left + right) / 2; /* partition */ double temp; double pivotValue = array[pivotIndex]; SWAP(array[pivotIndex], array[right]); pivotIndex = left; for (mwIndex i = left; i <= right - 1; ++i ) { // if (array[i] <= pivotValue || isnan(pivotValue)) // needed if data contains NaN's if (array[i] <= pivotValue) { SWAP(array[i], array[pivotIndex]); ++pivotIndex; } } SWAP(array[pivotIndex], array[right]); if (pivotIndex > k) right = pivotIndex - 1; else if (pivotIndex < k) left = pivotIndex + 1; else break; } } void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) { mwIndex k, n; // running indices mwSize szK, szX; double *T,*X,*Y,*K; char flag = 0; // default value // check for proper number of input and output arguments if ( PInputCount < 2 || PInputCount > 3 ) { mexPrintf("KTH_ELEMENT returns the K-th smallest element of vector X\n"); mexPrintf("\nusage:\tx = kth_element(X,k)\n"); mexPrintf("\nusage:\tx = kth_element(X,k,flag)\n"); mexPrintf("\nflag=0: the elements in X can be modified in-place, and data with NaN's is not correctly handled. This can be useful for performance reasons, but it might modify data in-place and is not save for data with NaN's. You are warned.\n"); mexPrintf("flag=1: prevents in-place modification of X using a local copy of the data, but does not handle data with NaN in the correct way.\n"); mexPrintf("flag=2: prevents in-place modification of X using a local copy of the data and handles NaN's correctly. This is the save but slowest option.\n"); mexPrintf("\nsee also: median, quantile\n\n"); mexErrMsgTxt("KTH_ELEMENT requires two or three input arguments\n"); } else if (PInputCount == 3) { // check value of flag mwSize N = mxGetNumberOfElements(PInputs[2]); if (N>1) mexErrMsgTxt("KTH_ELEMENT: flag argument must be scalar\n"); else if (N==1) { switch (mxGetClassID(PInputs[2])) { case mxLOGICAL_CLASS: case mxCHAR_CLASS: case mxINT8_CLASS: case mxUINT8_CLASS: flag = (char)*(uint8_t*)mxGetData(PInputs[2]); break; case mxDOUBLE_CLASS: flag = (char)*(double*)mxGetData(PInputs[2]); break; case mxSINGLE_CLASS: flag = (char)*(float*)mxGetData(PInputs[2]); break; case mxINT16_CLASS: case mxUINT16_CLASS: flag = (char)*(uint16_t*)mxGetData(PInputs[2]); break; case mxINT32_CLASS: case mxUINT32_CLASS: flag = (char)*(uint32_t*)mxGetData(PInputs[2]); break; case mxINT64_CLASS: case mxUINT64_CLASS: flag = (char)*(uint64_t*)mxGetData(PInputs[2]); break; case mxFUNCTION_CLASS: case mxUNKNOWN_CLASS: case mxCELL_CLASS: case mxSTRUCT_CLASS: default: mexErrMsgTxt("KTH_ELEMENT: Type of 3rd input argument not supported."); } } // else flag = default value } // else flag = default value if (POutputCount > 2) mexErrMsgTxt("KTH_ELEMENT has only one output arguments."); // get 1st argument if (mxIsComplex(PInputs[0]) || mxIsComplex(PInputs[1])) mexErrMsgTxt("complex argument not supported (yet). "); if (!mxIsDouble(PInputs[0]) || !mxIsDouble(PInputs[1])) mexErrMsgTxt("input arguments must be of type double . "); // TODO: support of complex, and integer data szK = mxGetNumberOfElements(PInputs[1]); K = (double*)mxGetData(PInputs[1]); szX = mxGetNumberOfElements(PInputs[0]); X = (double*)mxGetData(PInputs[0]); if (flag==0) T = X; else { //***** create temporary copy for avoiding unintended side effects (in-place sort of input data) */ T = (double*)mxMalloc(szX*sizeof(double)); if (flag==1) memcpy(T,X,szX*sizeof(double)); else { /* do not copy NaN's */ for (k=0,n=0; k < szX; k++) { if (!isnan(X[k])) T[n++]=X[k]; } szX = n; } } /*********** create output arguments *****************/ POutput[0] = mxCreateDoubleMatrix(mxGetM(PInputs[1]),mxGetN(PInputs[1]),mxREAL); Y = (double*) mxGetData(POutput[0]); for (k=0; k < szK; k++) { n = K[k]-1; // convert to zero-based indexing if (n >= szX || n < 0) Y[k] = 0.0/0.0; // NaN: result undefined else { findFirstK(T, 0, szX-1, n); Y[k] = T[n]; } } if (flag) mxFree(T); return; } NaN/src/Makefile0000664002356700235670000001507312546555136014213 0ustar schloeglschloegl#################################################### # Copyright 2010, 2011,2012 Alois Schloegl # This is part of the NaN-toolbox - a statistics and machine learning toolbox for data with and without missing values. # http://pub.ist.ac.at/~schloegl/matlab/NaN/ #################################################### ### modify directories according to your needs # Define non-default octave-version # Octave - global install (e.g. from debian package) # OCTAVE_VERSION= # Better alternative: define an OCTAVE_VERSION bash variable (or in .bashrc or .profile) # OCTAVE_VERSION=-3.6.3 # Matlab configuration #MATLABDIR = /usr/local/MATLAB/R2010b # comment the following line if you use MATLAB on 32-bit operating system MEX_OPTION += -largeArrayDims # Mingw crosscompiler: available at http://www.nongnu.org/mingw-cross-env/ CROSS = $(HOME)/src/mxe/usr/bin/i686-w64-mingw32.static- CROSS64 = $(HOME)/src/mxe/usr/bin/x86_64-w64-mingw32.static- # include directory for Win32-Matlab include W32MAT_INC = $(HOME)/bin/win32/Matlab/R2010b/extern/include/ W64MAT_INC = $(HOME)/bin/win64/Matlab/R2010b/extern/include/ # path to GNUMEX libraries, available from here http://sourceforge.net/projects/gnumex/ GNUMEX = $(HOME)/bin/win32/gnumex GNUMEX64 = $(HOME)/bin/win64/gnumex # building gnumex64 was difficult, these hints were quite useful: # http://sourceforge.net/mailarchive/forum.php?thread_name=AANLkTinZvxgC9ezp2P3UCX_a7TAUYuVsp2U40MQUV6qr%40mail.gmail.com&forum_name=gnumex-users # Instead of building "mex shortpath.c" and "mex uigetpath.c", I used empty m-functions within argout=argin; #################################################### MKOCTFILE ?= mkoctfile$(OCTAVE_VERSION) CC = gcc CXX = g++ CFLAGS = -fopenmp -Wall -Wextra -Wconversion -O2 OCTMEX = $(MKOCTFILE) --mex RM = rm ifneq ($(OS),Windows_NT) CFLAGS += -fPIC endif ifneq (Darwin,$(shell uname)) CFLAGS += -fopenmp MEX_OPTION += -lgomp endif MEX_OPTION += CC\#$(CXX) CXX\#$(CXX) CFLAGS\#"$(CFLAGS) " CXXFLAGS\#"$(CFLAGS) " MATMEX = $(MATLABDIR)/bin/mex $(MEX_OPTION) PROGS = histo_mex.mex covm_mex.mex kth_element.mex sumskipnan_mex.mex str2array.mex train.mex svmtrain_mex.mex svmpredict_mex.mex xptopen.mex ### per default only the mex-files for octave are built mex4o octave: $(PROGS) ### Matlab configuration - search for a matlab directory if not defined above ifeq (,$(MATLABDIR)) ifneq (,$(shell ls -1 /usr/local/ |grep MATLAB)) # use oldest, typically mex-files are compatible with newer Matlab versions MATLABDIR=/usr/local/MATLAB/$(shell ls -1rt /usr/local/MATLAB/ |grep "^R20*" |head -1) endif endif ### if MATLABDIR has been found or defined ifneq (,$(MATLABDIR)) ifneq (,$(shell ls -1 $(MATLABDIR)/bin/mexext)) MEX_EXT=$(shell $(MATLABDIR)/bin/mexext) mex4m matlab: $(patsubst %.mex, %.$(MEX_EXT), $(PROGS)) endif endif mexw32 win32: $(patsubst %.mex, %.mexw32, $(PROGS)) mexw64 win64: $(patsubst %.mex, %.mexw64, $(PROGS)) all: octave win32 win64 mex4m clean: -$(RM) *.o *.obj *.o64 core octave-core *.oct *~ *.mex* #$(PROGS): Makefile ######################################################### # Octave, MATLAB on Linux ######################################################### svm%_mex.mex: svm%_mex.cpp svm.o svm_model_octave.o env CC=$(CXX) $(OCTMEX) "$<" svm.o svm_model_octave.o svm%_mex.$(MEX_EXT): svm%_mex.cpp svm.o svm_model_matlab.o $(MATMEX) "$<" svm.o svm_model_matlab.o %.$(MEX_EXT): %.cpp $(MATMEX) "$<" svm_model_octave.o: svm_model_matlab.c env CC=$(CC) $(MKOCTFILE) -o "$@" -c "$<" svm_model_matlab.o: svm_model_matlab.c $(CC) $(CFLAGS) -I $(MATLABDIR)/extern/include -o "$@" -c "$<" svm.o: svm.cpp $(CC) $(CFLAGS) -c svm.cpp train.$(MEX_EXT) predict.$(MEX_EXT): train.c tron.o linear.o linear_model_matlab.c $(CC) $(CFLAGS) -I $(MATLABDIR)/extern/include -c linear_model_matlab.c $(MATMEX) -lblas train.c tron.o linear.o linear_model_matlab.o #$(MATMEX) -lblas predict.c tron.o linear.o linear_model_matlab.o train.mex predict.mex: train.c tron.o linear.o linear_model_matlab.c env CC=$(CXX) $(OCTMEX) -lblas train.c tron.o linear.o linear_model_matlab.c linear.o: linear.cpp $(CXX) $(CFLAGS) -c linear.cpp tron.o: tron.cpp tron.h $(CXX) $(CFLAGS) -c tron.cpp %.oct: %.cc mkoctfile$(OCTAVE_VERSION) "$<" %.mex: %.cpp $(OCTMEX) "$<" ######################################################### # MATLAB/WIN32 ######################################################### %.obj: %.cpp $(CROSS)$(CXX) -fopenmp -c -DMATLAB_MEX_FILE -x c++ -o "$@" -I$(W32MAT_INC) -O2 -DMX_COMPAT_32 "$<" %.obj: %.c $(CROSS)$(CXX) -fopenmp -c -DMATLAB_MEX_FILE -x c++ -o "$@" -I$(W32MAT_INC) -O2 -DMX_COMPAT_32 "$<" train.mexw32 predict.mexw32: train.obj linear.obj linear_model_matlab.obj tron.obj $(CROSS)$(CXX) -shared $(GNUMEX)/mex.def -o "$@" -L$(GNUMEX) -s "$<" linear_model_matlab.obj linear.obj tron.obj -llibmx -llibmex -llibmat -lcholmod -lblas svmpredict_mex.mexw32 : svmpredict_mex.obj svm.obj svm_model_matlab.obj $(CROSS)$(CXX) -shared $(GNUMEX)/mex.def -o "$@" -L$(GNUMEX) -s "$<" svm_model_matlab.obj svm.obj -llibmx -llibmex -llibmat -lcholmod svmtrain_mex.mexw32 : svmtrain_mex.obj svm.obj svm_model_matlab.obj $(CROSS)$(CXX) -shared $(GNUMEX)/mex.def -o "$@" -L$(GNUMEX) -s "$<" svm_model_matlab.obj svm.obj -llibmx -llibmex -llibmat -lcholmod %.mexw32: %.obj $(CROSS)$(CXX) -shared $(GNUMEX)/mex.def -o "$@" -L$(GNUMEX) -s "$<" -llibmx -llibmex -llibmat -lcholmod -lgomp -lpthread ######################################################### # MATLAB/WIN64 ######################################################### ## ToDO: fix OpenMP support: currently -fopenmp causes Matlab to crash %.o64: %.cpp $(CROSS64)$(CXX) -c -DMATLAB_MEX_FILE -x c++ -o "$@" -I$(W64MAT_INC) -O2 "$<" %.o64: %.c $(CROSS64)$(CXX) -c -DMATLAB_MEX_FILE -x c++ -o "$@" -I$(W64MAT_INC) -O2 "$<" train.mexw64 predict.mexw64: train.o64 linear.o64 linear_model_matlab.o64 tron.o64 $(CROSS64)$(CXX) -shared $(GNUMEX64)/mex.def -o "$@" -L$(GNUMEX64) -s "$<" linear_model_matlab.o64 linear.o64 tron.o64 -llibmx -llibmex -llibmat -lcholmod -lblas svmpredict_mex.mexw64 : svmpredict_mex.o64 svm.o64 svm_model_matlab.o64 $(CROSS64)$(CXX) -shared $(GNUMEX64)/mex.def -o "$@" -L$(GNUMEX64) -s "$<" svm_model_matlab.o64 svm.o64 -llibmx -llibmex -llibmat -lcholmod svmtrain_mex.mexw64 : svmtrain_mex.o64 svm.o64 svm_model_matlab.o64 $(CROSS64)$(CXX) -shared $(GNUMEX64)/mex.def -o "$@" -L$(GNUMEX64) -s "$<" svm_model_matlab.o64 svm.o64 -llibmx -llibmex -llibmat -lcholmod %.mexw64: %.o64 $(CROSS64)$(CXX) -shared $(GNUMEX64)/mex.def -o "$@" -L$(GNUMEX64) -s "$<" -llibmx -llibmex -llibmat -lcholmod -lgomp -lpthread NaN/src/svmpredict_mex.cpp0000664002356700235670000002414012516156517016300 0ustar schloeglschloegl/* $Id: svmpredict_mex.cpp 12791 2015-04-23 11:53:51Z schloegl $ Copyright (c) 2000-2012 Chih-Chung Chang and Chih-Jen Lin Copyright (c) 2010,2011,2015 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from libsvm-3.12 in Apr 2015 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #include #include #include #include "svm.h" #include "mex.h" #include "svm_model_matlab.h" #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; typedef int mwIndex; #endif #endif #define CMD_LEN 2048 void read_sparse_instance(const mxArray *prhs, int index, struct svm_node *x) { int i, j, low, high; mwIndex *ir, *jc; double *samples; ir = mxGetIr(prhs); jc = mxGetJc(prhs); samples = mxGetPr(prhs); // each column is one instance j = 0; low = (int)jc[index], high = (int)jc[index+1]; for(i=low;iparam.kernel_type == PRECOMPUTED) { // precomputed kernel requires dense matrix, so we make one mxArray *rhs[1], *lhs[1]; rhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, lhs, 1, rhs, "full")) { mexPrintf("Error: cannot full testing instance matrix\n"); fake_answer(plhs); return; } ptr_instance = mxGetPr(lhs[0]); mxDestroyArray(rhs[0]); } else { mxArray *pprhs[1]; pprhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) { mexPrintf("Error: cannot transpose testing instance matrix\n"); fake_answer(plhs); return; } } } if(predict_probability) { if(svm_type==NU_SVR || svm_type==EPSILON_SVR) mexPrintf("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model)); else prob_estimates = (double *) malloc(nr_class*sizeof(double)); } plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); if(predict_probability) { // prob estimates are in plhs[2] if(svm_type==C_SVC || svm_type==NU_SVC) plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); else plhs[2] = mxCreateDoubleMatrix(0, 0, mxREAL); } else { // decision values are in plhs[2] if(svm_type == ONE_CLASS || svm_type == EPSILON_SVR || svm_type == NU_SVR || nr_class == 1) // if only one class in training data, decision values are still returned. plhs[2] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); else plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class*(nr_class-1)/2, mxREAL); } ptr_predict_label = mxGetPr(plhs[0]); ptr_prob_estimates = mxGetPr(plhs[2]); ptr_dec_values = mxGetPr(plhs[2]); x = (struct svm_node*)malloc((feature_number+1)*sizeof(struct svm_node) ); for(instance_index=0;instance_indexparam.kernel_type != PRECOMPUTED) // prhs[1]^T is still sparse read_sparse_instance(pplhs[0], instance_index, x); else { for(i=0;i 4 || nrhs < 3) { exit_with_help(); fake_answer(plhs); return; } if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) { mexPrintf("Error: label vector and instance matrix must be double\n"); fake_answer(plhs); return; } if(mxIsStruct(prhs[2])) { const char *error_msg; // parse options if(nrhs==4) { int i, argc = 1; char cmd[CMD_LEN], *argv[CMD_LEN/2]; // put options in argv[] mxGetString(prhs[3], cmd, mxGetN(prhs[3]) + 1); if((argv[argc] = strtok(cmd, " ")) != NULL) while((argv[++argc] = strtok(NULL, " ")) != NULL) ; for(i=1;i=argc) { exit_with_help(); fake_answer(plhs); return; } switch(argv[i-1][1]) { case 'b': prob_estimate_flag = atoi(argv[i]); break; default: mexPrintf("Unknown option: -%c\n", argv[i-1][1]); exit_with_help(); fake_answer(plhs); return; } } } model = matlab_matrix_to_model(prhs[2], &error_msg); if (model == NULL) { mexPrintf("Error: can't read model: %s\n", error_msg); fake_answer(plhs); return; } if(prob_estimate_flag) { if(svm_check_probability_model(model)==0) { mexPrintf("Model does not support probabiliy estimates\n"); fake_answer(plhs); svm_free_and_destroy_model(&model); return; } } else { if(svm_check_probability_model(model)!=0) mexPrintf("Model supports probability estimates, but disabled in predicton.\n"); } predict(plhs, prhs, model, prob_estimate_flag); // destroy model svm_free_and_destroy_model(&model); } else { mexPrintf("model file should be a struct array\n"); fake_answer(plhs); } return; } NaN/src/svmtrain_mex.cpp0000664002356700235670000003014612512501472015754 0ustar schloeglschloegl/* $Id: svmtrain_mex.cpp 12775 2015-04-12 14:37:46Z schloegl $ Copyright (c) 2000-2012 Chih-Chung Chang and Chih-Jen Lin Copyright (c) 2010,2015 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from libsvm-3.12 in Apr 2015 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #include #include #include #include #include "svm.h" #include "mex.h" #include "svm_model_matlab.h" #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; typedef int mwIndex; #endif #endif #define CMD_LEN 2048 #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) void print_null(const char *s) {} void print_string_matlab(const char *s) {mexPrintf(s);} void exit_with_help() { mexPrintf( "Usage: model = svmtrain_mex(training_label_vector, training_instance_matrix, 'libsvm_options');\n" "libsvm_options:\n" "-s svm_type : set type of SVM (default 0)\n" " 0 -- C-SVC\n" " 1 -- nu-SVC\n" " 2 -- one-class SVM\n" " 3 -- epsilon-SVR\n" " 4 -- nu-SVR\n" "-t kernel_type : set type of kernel function (default 2)\n" " 0 -- linear: u'*v\n" " 1 -- polynomial: (gamma*u'*v + coef0)^degree\n" " 2 -- radial basis function: exp(-gamma*|u-v|^2)\n" " 3 -- sigmoid: tanh(gamma*u'*v + coef0)\n" " 4 -- precomputed kernel (kernel values in training_instance_matrix)\n" "-d degree : set degree in kernel function (default 3)\n" "-g gamma : set gamma in kernel function (default 1/num_features)\n" "-r coef0 : set coef0 in kernel function (default 0)\n" "-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)\n" "-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)\n" "-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n" "-m cachesize : set cache memory size in MB (default 100)\n" "-e epsilon : set tolerance of termination criterion (default 0.001)\n" "-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n" "-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n" "-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n" "-v n : n-fold cross validation mode\n" "-q : quiet mode (no outputs)\n" ); } // svm arguments struct svm_parameter param; // set by parse_command_line struct svm_problem prob; // set by read_problem struct svm_model *model; struct svm_node *x_space; int cross_validation; int nr_fold; double do_cross_validation() { int i; int total_correct = 0; double total_error = 0; double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0; double *target = Malloc(double,prob.l); double retval = 0.0; svm_cross_validation(&prob,¶m,nr_fold,target); if(param.svm_type == EPSILON_SVR || param.svm_type == NU_SVR) { for(i=0;i 2) { // put options in argv[] mxGetString(prhs[2], cmd, mxGetN(prhs[2]) + 1); if((argv[argc] = strtok(cmd, " ")) != NULL) while((argv[++argc] = strtok(NULL, " ")) != NULL) ; } // parse options for(i=1;i=argc && argv[i-1][1] != 'q') // since option -q has no parameter return 1; switch(argv[i-1][1]) { case 's': param.svm_type = atoi(argv[i]); break; case 't': param.kernel_type = atoi(argv[i]); break; case 'd': param.degree = atoi(argv[i]); break; case 'g': param.gamma = atof(argv[i]); break; case 'r': param.coef0 = atof(argv[i]); break; case 'n': param.nu = atof(argv[i]); break; case 'm': param.cache_size = atof(argv[i]); break; case 'c': param.C = atof(argv[i]); break; case 'e': param.eps = atof(argv[i]); break; case 'p': param.p = atof(argv[i]); break; case 'h': param.shrinking = atoi(argv[i]); break; case 'b': param.probability = atoi(argv[i]); break; case 'q': print_func = &print_null; i--; break; case 'v': cross_validation = 1; nr_fold = atoi(argv[i]); if(nr_fold < 2) { mexPrintf("n-fold cross validation: n must >= 2\n"); return 1; } break; case 'w': ++param.nr_weight; param.weight_label = (int *)realloc(param.weight_label,sizeof(int)*param.nr_weight); param.weight = (double *)realloc(param.weight,sizeof(double)*param.nr_weight); param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]); param.weight[param.nr_weight-1] = atof(argv[i]); break; default: mexPrintf("Unknown option -%c\n", argv[i-1][1]); return 1; } } svm_set_print_string_function(print_func); return 0; } // read in a problem (in svmlight format) int read_problem_dense(const mxArray *label_vec, const mxArray *instance_mat) { int i, j, k; int elements, max_index, sc, label_vector_row_num; double *samples, *labels; prob.x = NULL; prob.y = NULL; x_space = NULL; labels = mxGetPr(label_vec); samples = mxGetPr(instance_mat); sc = (int)mxGetN(instance_mat); elements = 0; // the number of instance prob.l = (int)mxGetM(instance_mat); label_vector_row_num = (int)mxGetM(label_vec); if(label_vector_row_num!=prob.l) { mexPrintf("Length of label vector does not match # of instances.\n"); return -1; } if(param.kernel_type == PRECOMPUTED) elements = prob.l * (sc + 1); else { for(i = 0; i < prob.l; i++) { for(k = 0; k < sc; k++) if(samples[k * prob.l + i] != 0) elements++; // count the '-1' element elements++; } } prob.y = Malloc(double,prob.l); prob.x = Malloc(struct svm_node *,prob.l); x_space = Malloc(struct svm_node, elements); max_index = sc; j = 0; for(i = 0; i < prob.l; i++) { prob.x[i] = &x_space[j]; prob.y[i] = labels[i]; for(k = 0; k < sc; k++) { if(param.kernel_type == PRECOMPUTED || samples[k * prob.l + i] != 0) { x_space[j].index = k + 1; x_space[j].value = samples[k * prob.l + i]; j++; } } x_space[j++].index = -1; } if(param.gamma == 0 && max_index > 0) param.gamma = 1.0/max_index; if(param.kernel_type == PRECOMPUTED) for(i=0;i max_index) { mexPrintf("Wrong input format: sample_serial_number out of range\n"); return -1; } } return 0; } int read_problem_sparse(const mxArray *label_vec, const mxArray *instance_mat) { int i, j, k, low, high; mwIndex *ir, *jc; int elements, max_index, num_samples, label_vector_row_num; double *samples, *labels; mxArray *instance_mat_col; // transposed instance sparse matrix prob.x = NULL; prob.y = NULL; x_space = NULL; // transpose instance matrix { mxArray *prhs[1], *plhs[1]; prhs[0] = mxDuplicateArray(instance_mat); if(mexCallMATLAB(1, plhs, 1, prhs, "transpose")) { mexPrintf("Error: cannot transpose training instance matrix\n"); return -1; } instance_mat_col = plhs[0]; mxDestroyArray(prhs[0]); } // each column is one instance labels = mxGetPr(label_vec); samples = mxGetPr(instance_mat_col); ir = mxGetIr(instance_mat_col); jc = mxGetJc(instance_mat_col); num_samples = (int)mxGetNzmax(instance_mat_col); // the number of instance prob.l = (int)mxGetN(instance_mat_col); label_vector_row_num = (int)mxGetM(label_vec); if(label_vector_row_num!=prob.l) { mexPrintf("Length of label vector does not match # of instances.\n"); return -1; } elements = num_samples + prob.l; max_index = (int)mxGetM(instance_mat_col); prob.y = Malloc(double,prob.l); prob.x = Malloc(struct svm_node *,prob.l); x_space = Malloc(struct svm_node, elements); j = 0; for(i=0;i 0) param.gamma = 1.0/max_index; return 0; } static void fake_answer(mxArray *plhs[]) { plhs[0] = mxCreateDoubleMatrix(0, 0, mxREAL); } // Interface function of matlab // now assume prhs[0]: label prhs[1]: features void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) { const char *error_msg; // fix random seed to have same results for each run // (for cross validation and probability estimation) srand(1); // Transform the input Matrix to libsvm format if(nrhs > 1 && nrhs < 4) { int err; if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) { mexPrintf("Error: label vector and instance matrix must be double\n"); fake_answer(plhs); return; } if(parse_command_line(nrhs, prhs, NULL)) { exit_with_help(); svm_destroy_param(¶m); fake_answer(plhs); return; } if(mxIsSparse(prhs[1])) { if(param.kernel_type == PRECOMPUTED) { // precomputed kernel requires dense matrix, so we make one mxArray *rhs[1], *lhs[1]; rhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, lhs, 1, rhs, "full")) { mexPrintf("Error: cannot generate a full training instance matrix\n"); svm_destroy_param(¶m); fake_answer(plhs); return; } err = read_problem_dense(prhs[0], lhs[0]); mxDestroyArray(lhs[0]); mxDestroyArray(rhs[0]); } else err = read_problem_sparse(prhs[0], prhs[1]); } else err = read_problem_dense(prhs[0], prhs[1]); // svmtrain's original code error_msg = svm_check_parameter(&prob, ¶m); if(err || error_msg) { if (error_msg != NULL) mexPrintf("Error: %s\n", error_msg); svm_destroy_param(¶m); free(prob.y); free(prob.x); free(x_space); fake_answer(plhs); return; } if(cross_validation) { double *ptr; plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL); ptr = mxGetPr(plhs[0]); ptr[0] = do_cross_validation(); } else { int nr_feat = (int)mxGetN(prhs[1]); const char *error_msg; model = svm_train(&prob, ¶m); error_msg = model_to_matlab_structure(plhs, nr_feat, model); if(error_msg) mexPrintf("Error: can't convert libsvm model to matrix structure: %s\n", error_msg); svm_free_and_destroy_model(&model); } svm_destroy_param(¶m); free(prob.y); free(prob.x); free(x_space); } else { exit_with_help(); fake_answer(plhs); return; } } NaN/src/svm.cpp0000664002356700235670000017552412512501472014057 0ustar schloeglschloegl/* Copyright (c) 2000-2012 Chih-Chung Chang and Chih-Jen Lin Copyright (c) 2010,2011,2015 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from libsvm-3.12 in Apr 2015 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #include #include #include #include #include #include #include #include #include #include "svm.h" int libsvm_version = LIBSVM_VERSION; typedef float Qfloat; typedef signed char schar; #ifndef min template static inline T min(T x,T y) { return (x static inline T max(T x,T y) { return (x>y)?x:y; } #endif template static inline void swap(T& x, T& y) { T t=x; x=y; y=t; } template static inline void clone(T*& dst, S* src, int n) { dst = new T[n]; memcpy((void *)dst,(void *)src,sizeof(T)*n); } static inline double powi(double base, int times) { double tmp = base, ret = 1.0; for(int t=times; t>0; t/=2) { if(t%2==1) ret*=tmp; tmp = tmp * tmp; } return ret; } #define INF HUGE_VAL #define TAU 1e-12 #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) static void print_string_stdout(const char *s) { fputs(s,stdout); fflush(stdout); } static void (*svm_print_string) (const char *) = &print_string_stdout; #if 1 static void info(const char *fmt,...) { char buf[BUFSIZ]; va_list ap; va_start(ap,fmt); vsprintf(buf,fmt,ap); va_end(ap); (*svm_print_string)(buf); } #else static void info(const char *fmt,...) {} #endif // // Kernel Cache // // l is the number of total data items // size is the cache size limit in bytes // class Cache { public: Cache(int l,long int size); ~Cache(); // request data [0,len) // return some position p where [p,len) need to be filled // (p >= len if nothing needs to be filled) int get_data(const int index, Qfloat **data, int len); void swap_index(int i, int j); private: int l; long int size; struct head_t { head_t *prev, *next; // a circular list Qfloat *data; int len; // data[0,len) is cached in this entry }; head_t *head; head_t lru_head; void lru_delete(head_t *h); void lru_insert(head_t *h); }; Cache::Cache(int l_,long int size_):l(l_),size(size_) { head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0 size /= sizeof(Qfloat); size -= l * sizeof(head_t) / sizeof(Qfloat); size = max(size, 2 * (long int) l); // cache must be large enough for two columns lru_head.next = lru_head.prev = &lru_head; } Cache::~Cache() { for(head_t *h = lru_head.next; h != &lru_head; h=h->next) free(h->data); free(head); } void Cache::lru_delete(head_t *h) { // delete from current location h->prev->next = h->next; h->next->prev = h->prev; } void Cache::lru_insert(head_t *h) { // insert to last position h->next = &lru_head; h->prev = lru_head.prev; h->prev->next = h; h->next->prev = h; } int Cache::get_data(const int index, Qfloat **data, int len) { head_t *h = &head[index]; if(h->len) lru_delete(h); int more = len - h->len; if(more > 0) { // free old space while(size < more) { head_t *old = lru_head.next; lru_delete(old); free(old->data); size += old->len; old->data = 0; old->len = 0; } // allocate new space h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len); size -= more; swap(h->len,len); } lru_insert(h); *data = h->data; return len; } void Cache::swap_index(int i, int j) { if(i==j) return; if(head[i].len) lru_delete(&head[i]); if(head[j].len) lru_delete(&head[j]); swap(head[i].data,head[j].data); swap(head[i].len,head[j].len); if(head[i].len) lru_insert(&head[i]); if(head[j].len) lru_insert(&head[j]); if(i>j) swap(i,j); for(head_t *h = lru_head.next; h!=&lru_head; h=h->next) { if(h->len > i) { if(h->len > j) swap(h->data[i],h->data[j]); else { // give up lru_delete(h); free(h->data); size += h->len; h->data = 0; h->len = 0; } } } } // // Kernel evaluation // // the static method k_function is for doing single kernel evaluation // the constructor of Kernel prepares to calculate the l*l kernel matrix // the member function get_Q is for getting one column from the Q Matrix // class QMatrix { public: virtual Qfloat *get_Q(int column, int len) const = 0; virtual double *get_QD() const = 0; virtual void swap_index(int i, int j) const = 0; virtual ~QMatrix() {} }; class Kernel: public QMatrix { public: Kernel(int l, svm_node * const * x, const svm_parameter& param); virtual ~Kernel(); static double k_function(const svm_node *x, const svm_node *y, const svm_parameter& param); virtual Qfloat *get_Q(int column, int len) const = 0; virtual double *get_QD() const = 0; virtual void swap_index(int i, int j) const // no so const... { swap(x[i],x[j]); if(x_square) swap(x_square[i],x_square[j]); } protected: double (Kernel::*kernel_function)(int i, int j) const; private: const svm_node **x; double *x_square; // svm_parameter const int kernel_type; const int degree; const double gamma; const double coef0; static double dot(const svm_node *px, const svm_node *py); double kernel_linear(int i, int j) const { return dot(x[i],x[j]); } double kernel_poly(int i, int j) const { return powi(gamma*dot(x[i],x[j])+coef0,degree); } double kernel_rbf(int i, int j) const { return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j]))); } double kernel_sigmoid(int i, int j) const { return tanh(gamma*dot(x[i],x[j])+coef0); } double kernel_precomputed(int i, int j) const { return x[i][(int)(x[j][0].value)].value; } }; Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param) :kernel_type(param.kernel_type), degree(param.degree), gamma(param.gamma), coef0(param.coef0) { switch(kernel_type) { case LINEAR: kernel_function = &Kernel::kernel_linear; break; case POLY: kernel_function = &Kernel::kernel_poly; break; case RBF: kernel_function = &Kernel::kernel_rbf; break; case SIGMOID: kernel_function = &Kernel::kernel_sigmoid; break; case PRECOMPUTED: kernel_function = &Kernel::kernel_precomputed; break; } clone(x,x_,l); if(kernel_type == RBF) { x_square = new double[l]; for(int i=0;iindex != -1 && py->index != -1) { if(px->index == py->index) { sum += px->value * py->value; ++px; ++py; } else { if(px->index > py->index) ++py; else ++px; } } return sum; } double Kernel::k_function(const svm_node *x, const svm_node *y, const svm_parameter& param) { switch(param.kernel_type) { case LINEAR: return dot(x,y); case POLY: return powi(param.gamma*dot(x,y)+param.coef0,param.degree); case RBF: { double sum = 0; while(x->index != -1 && y->index !=-1) { if(x->index == y->index) { double d = x->value - y->value; sum += d*d; ++x; ++y; } else { if(x->index > y->index) { sum += y->value * y->value; ++y; } else { sum += x->value * x->value; ++x; } } } while(x->index != -1) { sum += x->value * x->value; ++x; } while(y->index != -1) { sum += y->value * y->value; ++y; } return exp(-param.gamma*sum); } case SIGMOID: return tanh(param.gamma*dot(x,y)+param.coef0); case PRECOMPUTED: //x: test (validation), y: SV return x[(int)(y->value)].value; default: return 0; // Unreachable } } // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918 // Solves: // // min 0.5(\alpha^T Q \alpha) + p^T \alpha // // y^T \alpha = \delta // y_i = +1 or -1 // 0 <= alpha_i <= Cp for y_i = 1 // 0 <= alpha_i <= Cn for y_i = -1 // // Given: // // Q, p, y, Cp, Cn, and an initial feasible point \alpha // l is the size of vectors and matrices // eps is the stopping tolerance // // solution will be put in \alpha, objective value will be put in obj // class Solver { public: Solver() {}; virtual ~Solver() {}; struct SolutionInfo { double obj; double rho; double upper_bound_p; double upper_bound_n; double r; // for Solver_NU }; void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_, double *alpha_, double Cp, double Cn, double eps, SolutionInfo* si, int shrinking); protected: int active_size; schar *y; double *G; // gradient of objective function enum { LOWER_BOUND, UPPER_BOUND, FREE }; char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE double *alpha; const QMatrix *Q; const double *QD; double eps; double Cp,Cn; double *p; int *active_set; double *G_bar; // gradient, if we treat free variables as 0 int l; bool unshrink; // XXX double get_C(int i) { return (y[i] > 0)? Cp : Cn; } void update_alpha_status(int i) { if(alpha[i] >= get_C(i)) alpha_status[i] = UPPER_BOUND; else if(alpha[i] <= 0) alpha_status[i] = LOWER_BOUND; else alpha_status[i] = FREE; } bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; } bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; } bool is_free(int i) { return alpha_status[i] == FREE; } void swap_index(int i, int j); void reconstruct_gradient(); virtual int select_working_set(int &i, int &j); virtual double calculate_rho(); virtual void do_shrinking(); private: bool be_shrunk(int i, double Gmax1, double Gmax2); }; void Solver::swap_index(int i, int j) { Q->swap_index(i,j); swap(y[i],y[j]); swap(G[i],G[j]); swap(alpha_status[i],alpha_status[j]); swap(alpha[i],alpha[j]); swap(p[i],p[j]); swap(active_set[i],active_set[j]); swap(G_bar[i],G_bar[j]); } void Solver::reconstruct_gradient() { // reconstruct inactive elements of G from G_bar and free variables if(active_size == l) return; int i,j; int nr_free = 0; for(j=active_size;j 2*active_size*(l-active_size)) { for(i=active_size;iget_Q(i,active_size); for(j=0;jget_Q(i,l); double alpha_i = alpha[i]; for(j=active_size;jl = l; this->Q = &Q; QD=Q.get_QD(); clone(p, p_,l); clone(y, y_,l); clone(alpha,alpha_,l); this->Cp = Cp; this->Cn = Cn; this->eps = eps; unshrink = false; // initialize alpha_status { alpha_status = new char[l]; for(int i=0;iINT_MAX/100 ? INT_MAX : 100*l); int counter = min(l,1000)+1; while(iter < max_iter) { // show progress and do shrinking if(--counter == 0) { counter = min(l,1000); if(shrinking) do_shrinking(); info("."); } int i,j; if(select_working_set(i,j)!=0) { // reconstruct the whole gradient reconstruct_gradient(); // reset active set size and check active_size = l; info("*"); if(select_working_set(i,j)!=0) break; else counter = 1; // do shrinking next iteration } ++iter; // update alpha[i] and alpha[j], handle bounds carefully const Qfloat *Q_i = Q.get_Q(i,active_size); const Qfloat *Q_j = Q.get_Q(j,active_size); double C_i = get_C(i); double C_j = get_C(j); double old_alpha_i = alpha[i]; double old_alpha_j = alpha[j]; if(y[i]!=y[j]) { double quad_coef = QD[i]+QD[j]+2*Q_i[j]; if (quad_coef <= 0) quad_coef = TAU; double delta = (-G[i]-G[j])/quad_coef; double diff = alpha[i] - alpha[j]; alpha[i] += delta; alpha[j] += delta; if(diff > 0) { if(alpha[j] < 0) { alpha[j] = 0; alpha[i] = diff; } } else { if(alpha[i] < 0) { alpha[i] = 0; alpha[j] = -diff; } } if(diff > C_i - C_j) { if(alpha[i] > C_i) { alpha[i] = C_i; alpha[j] = C_i - diff; } } else { if(alpha[j] > C_j) { alpha[j] = C_j; alpha[i] = C_j + diff; } } } else { double quad_coef = QD[i]+QD[j]-2*Q_i[j]; if (quad_coef <= 0) quad_coef = TAU; double delta = (G[i]-G[j])/quad_coef; double sum = alpha[i] + alpha[j]; alpha[i] -= delta; alpha[j] += delta; if(sum > C_i) { if(alpha[i] > C_i) { alpha[i] = C_i; alpha[j] = sum - C_i; } } else { if(alpha[j] < 0) { alpha[j] = 0; alpha[i] = sum; } } if(sum > C_j) { if(alpha[j] > C_j) { alpha[j] = C_j; alpha[i] = sum - C_j; } } else { if(alpha[i] < 0) { alpha[i] = 0; alpha[j] = sum; } } } // update G double delta_alpha_i = alpha[i] - old_alpha_i; double delta_alpha_j = alpha[j] - old_alpha_j; for(int k=0;k= max_iter) { if(active_size < l) { // reconstruct the whole gradient to calculate objective value reconstruct_gradient(); active_size = l; info("*"); } info("\nWARNING: reaching max number of iterations"); } // calculate rho si->rho = calculate_rho(); // calculate objective value { double v = 0; int i; for(i=0;iobj = v/2; } // put back the solution { for(int i=0;iupper_bound_p = Cp; si->upper_bound_n = Cn; info("\noptimization finished, #iter = %d\n",iter); delete[] p; delete[] y; delete[] alpha; delete[] alpha_status; delete[] active_set; delete[] G; delete[] G_bar; } // return 1 if already optimal, return 0 otherwise int Solver::select_working_set(int &out_i, int &out_j) { // return i,j such that // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) // j: minimizes the decrease of obj value // (if quadratic coefficeint <= 0, replace it with tau) // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) double Gmax = -INF; double Gmax2 = -INF; int Gmax_idx = -1; int Gmin_idx = -1; double obj_diff_min = INF; for(int t=0;t= Gmax) { Gmax = -G[t]; Gmax_idx = t; } } else { if(!is_lower_bound(t)) if(G[t] >= Gmax) { Gmax = G[t]; Gmax_idx = t; } } int i = Gmax_idx; const Qfloat *Q_i = NULL; if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1 Q_i = Q->get_Q(i,active_size); for(int j=0;j= Gmax2) Gmax2 = G[j]; if (grad_diff > 0) { double obj_diff; double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j]; if (quad_coef > 0) obj_diff = -(grad_diff*grad_diff)/quad_coef; else obj_diff = -(grad_diff*grad_diff)/TAU; if (obj_diff <= obj_diff_min) { Gmin_idx=j; obj_diff_min = obj_diff; } } } } else { if (!is_upper_bound(j)) { double grad_diff= Gmax-G[j]; if (-G[j] >= Gmax2) Gmax2 = -G[j]; if (grad_diff > 0) { double obj_diff; double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j]; if (quad_coef > 0) obj_diff = -(grad_diff*grad_diff)/quad_coef; else obj_diff = -(grad_diff*grad_diff)/TAU; if (obj_diff <= obj_diff_min) { Gmin_idx=j; obj_diff_min = obj_diff; } } } } } if(Gmax+Gmax2 < eps) return 1; out_i = Gmax_idx; out_j = Gmin_idx; return 0; } bool Solver::be_shrunk(int i, double Gmax1, double Gmax2) { if(is_upper_bound(i)) { if(y[i]==+1) return(-G[i] > Gmax1); else return(-G[i] > Gmax2); } else if(is_lower_bound(i)) { if(y[i]==+1) return(G[i] > Gmax2); else return(G[i] > Gmax1); } else return(false); } void Solver::do_shrinking() { int i; double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) } double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) } // find maximal violating pair first for(i=0;i= Gmax1) Gmax1 = -G[i]; } if(!is_lower_bound(i)) { if(G[i] >= Gmax2) Gmax2 = G[i]; } } else { if(!is_upper_bound(i)) { if(-G[i] >= Gmax2) Gmax2 = -G[i]; } if(!is_lower_bound(i)) { if(G[i] >= Gmax1) Gmax1 = G[i]; } } } if(unshrink == false && Gmax1 + Gmax2 <= eps*10) { unshrink = true; reconstruct_gradient(); active_size = l; info("*"); } for(i=0;i i) { if (!be_shrunk(active_size, Gmax1, Gmax2)) { swap_index(i,active_size); break; } active_size--; } } } double Solver::calculate_rho() { double r; int nr_free = 0; double ub = INF, lb = -INF, sum_free = 0; for(int i=0;i0) r = sum_free/nr_free; else r = (ub+lb)/2; return r; } // // Solver for nu-svm classification and regression // // additional constraint: e^T \alpha = constant // class Solver_NU : public Solver { public: Solver_NU() {} void Solve(int l, const QMatrix& Q, const double *p, const schar *y, double *alpha, double Cp, double Cn, double eps, SolutionInfo* si, int shrinking) { this->si = si; Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking); } private: SolutionInfo *si; int select_working_set(int &i, int &j); double calculate_rho(); bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4); void do_shrinking(); }; // return 1 if already optimal, return 0 otherwise int Solver_NU::select_working_set(int &out_i, int &out_j) { // return i,j such that y_i = y_j and // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) // j: minimizes the decrease of obj value // (if quadratic coefficeint <= 0, replace it with tau) // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) double Gmaxp = -INF; double Gmaxp2 = -INF; int Gmaxp_idx = -1; double Gmaxn = -INF; double Gmaxn2 = -INF; int Gmaxn_idx = -1; int Gmin_idx = -1; double obj_diff_min = INF; for(int t=0;t= Gmaxp) { Gmaxp = -G[t]; Gmaxp_idx = t; } } else { if(!is_lower_bound(t)) if(G[t] >= Gmaxn) { Gmaxn = G[t]; Gmaxn_idx = t; } } int ip = Gmaxp_idx; int in = Gmaxn_idx; const Qfloat *Q_ip = NULL; const Qfloat *Q_in = NULL; if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1 Q_ip = Q->get_Q(ip,active_size); if(in != -1) Q_in = Q->get_Q(in,active_size); for(int j=0;j= Gmaxp2) Gmaxp2 = G[j]; if (grad_diff > 0) { double obj_diff; double quad_coef = QD[ip]+QD[j]-2*Q_ip[j]; if (quad_coef > 0) obj_diff = -(grad_diff*grad_diff)/quad_coef; else obj_diff = -(grad_diff*grad_diff)/TAU; if (obj_diff <= obj_diff_min) { Gmin_idx=j; obj_diff_min = obj_diff; } } } } else { if (!is_upper_bound(j)) { double grad_diff=Gmaxn-G[j]; if (-G[j] >= Gmaxn2) Gmaxn2 = -G[j]; if (grad_diff > 0) { double obj_diff; double quad_coef = QD[in]+QD[j]-2*Q_in[j]; if (quad_coef > 0) obj_diff = -(grad_diff*grad_diff)/quad_coef; else obj_diff = -(grad_diff*grad_diff)/TAU; if (obj_diff <= obj_diff_min) { Gmin_idx=j; obj_diff_min = obj_diff; } } } } } if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps) return 1; if (y[Gmin_idx] == +1) out_i = Gmaxp_idx; else out_i = Gmaxn_idx; out_j = Gmin_idx; return 0; } bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4) { if(is_upper_bound(i)) { if(y[i]==+1) return(-G[i] > Gmax1); else return(-G[i] > Gmax4); } else if(is_lower_bound(i)) { if(y[i]==+1) return(G[i] > Gmax2); else return(G[i] > Gmax3); } else return(false); } void Solver_NU::do_shrinking() { double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) } double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) } double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) } double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) } // find maximal violating pair first int i; for(i=0;i Gmax1) Gmax1 = -G[i]; } else if(-G[i] > Gmax4) Gmax4 = -G[i]; } if(!is_lower_bound(i)) { if(y[i]==+1) { if(G[i] > Gmax2) Gmax2 = G[i]; } else if(G[i] > Gmax3) Gmax3 = G[i]; } } if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) { unshrink = true; reconstruct_gradient(); active_size = l; } for(i=0;i i) { if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4)) { swap_index(i,active_size); break; } active_size--; } } } double Solver_NU::calculate_rho() { int nr_free1 = 0,nr_free2 = 0; double ub1 = INF, ub2 = INF; double lb1 = -INF, lb2 = -INF; double sum_free1 = 0, sum_free2 = 0; for(int i=0;i 0) r1 = sum_free1/nr_free1; else r1 = (ub1+lb1)/2; if(nr_free2 > 0) r2 = sum_free2/nr_free2; else r2 = (ub2+lb2)/2; si->r = (r1+r2)/2; return (r1-r2)/2; } // // Q matrices for various formulations // class SVC_Q: public Kernel { public: SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_) :Kernel(prob.l, prob.x, param) { clone(y,y_,prob.l); cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20))); QD = new double[prob.l]; for(int i=0;i*kernel_function)(i,i); } Qfloat *get_Q(int i, int len) const { Qfloat *data; int start, j; if((start = cache->get_data(i,&data,len)) < len) { for(j=start;j*kernel_function)(i,j)); } return data; } double *get_QD() const { return QD; } void swap_index(int i, int j) const { cache->swap_index(i,j); Kernel::swap_index(i,j); swap(y[i],y[j]); swap(QD[i],QD[j]); } ~SVC_Q() { delete[] y; delete cache; delete[] QD; } private: schar *y; Cache *cache; double *QD; }; class ONE_CLASS_Q: public Kernel { public: ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param) :Kernel(prob.l, prob.x, param) { cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20))); QD = new double[prob.l]; for(int i=0;i*kernel_function)(i,i); } Qfloat *get_Q(int i, int len) const { Qfloat *data; int start, j; if((start = cache->get_data(i,&data,len)) < len) { for(j=start;j*kernel_function)(i,j); } return data; } double *get_QD() const { return QD; } void swap_index(int i, int j) const { cache->swap_index(i,j); Kernel::swap_index(i,j); swap(QD[i],QD[j]); } ~ONE_CLASS_Q() { delete cache; delete[] QD; } private: Cache *cache; double *QD; }; class SVR_Q: public Kernel { public: SVR_Q(const svm_problem& prob, const svm_parameter& param) :Kernel(prob.l, prob.x, param) { l = prob.l; cache = new Cache(l,(long int)(param.cache_size*(1<<20))); QD = new double[2*l]; sign = new schar[2*l]; index = new int[2*l]; for(int k=0;k*kernel_function)(k,k); QD[k+l] = QD[k]; } buffer[0] = new Qfloat[2*l]; buffer[1] = new Qfloat[2*l]; next_buffer = 0; } void swap_index(int i, int j) const { swap(sign[i],sign[j]); swap(index[i],index[j]); swap(QD[i],QD[j]); } Qfloat *get_Q(int i, int len) const { Qfloat *data; int j, real_i = index[i]; if(cache->get_data(real_i,&data,l) < l) { for(j=0;j*kernel_function)(real_i,j); } // reorder and copy Qfloat *buf = buffer[next_buffer]; next_buffer = 1 - next_buffer; schar si = sign[i]; for(j=0;jl; double *minus_ones = new double[l]; schar *y = new schar[l]; int i; for(i=0;iy[i] > 0) y[i] = +1; else y[i] = -1; } Solver s; s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y, alpha, Cp, Cn, param->eps, si, param->shrinking); double sum_alpha=0; for(i=0;il)); for(i=0;il; double nu = param->nu; schar *y = new schar[l]; for(i=0;iy[i]>0) y[i] = +1; else y[i] = -1; double sum_pos = nu*l/2; double sum_neg = nu*l/2; for(i=0;ieps, si, param->shrinking); double r = si->r; info("C = %f\n",1/r); for(i=0;irho /= r; si->obj /= (r*r); si->upper_bound_p = 1/r; si->upper_bound_n = 1/r; delete[] y; delete[] zeros; } static void solve_one_class( const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo* si) { int l = prob->l; double *zeros = new double[l]; schar *ones = new schar[l]; int i; int n = (int)(param->nu*prob->l); // # of alpha's at upper bound for(i=0;il) alpha[n] = param->nu * prob->l - n; for(i=n+1;ieps, si, param->shrinking); delete[] zeros; delete[] ones; } static void solve_epsilon_svr( const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo* si) { int l = prob->l; double *alpha2 = new double[2*l]; double *linear_term = new double[2*l]; schar *y = new schar[2*l]; int i; for(i=0;ip - prob->y[i]; y[i] = 1; alpha2[i+l] = 0; linear_term[i+l] = param->p + prob->y[i]; y[i+l] = -1; } Solver s; s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, alpha2, param->C, param->C, param->eps, si, param->shrinking); double sum_alpha = 0; for(i=0;iC*l)); delete[] alpha2; delete[] linear_term; delete[] y; } static void solve_nu_svr( const svm_problem *prob, const svm_parameter *param, double *alpha, Solver::SolutionInfo* si) { int l = prob->l; double C = param->C; double *alpha2 = new double[2*l]; double *linear_term = new double[2*l]; schar *y = new schar[2*l]; int i; double sum = C * param->nu * l / 2; for(i=0;iy[i]; y[i] = 1; linear_term[i+l] = prob->y[i]; y[i+l] = -1; } Solver_NU s; s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, alpha2, C, C, param->eps, si, param->shrinking); info("epsilon = %f\n",-si->r); for(i=0;il); Solver::SolutionInfo si; switch(param->svm_type) { case C_SVC: solve_c_svc(prob,param,alpha,&si,Cp,Cn); break; case NU_SVC: solve_nu_svc(prob,param,alpha,&si); break; case ONE_CLASS: solve_one_class(prob,param,alpha,&si); break; case EPSILON_SVR: solve_epsilon_svr(prob,param,alpha,&si); break; case NU_SVR: solve_nu_svr(prob,param,alpha,&si); break; } info("obj = %f, rho = %f\n",si.obj,si.rho); // output SVs int nSV = 0; int nBSV = 0; for(int i=0;il;i++) { if(fabs(alpha[i]) > 0) { ++nSV; if(prob->y[i] > 0) { if(fabs(alpha[i]) >= si.upper_bound_p) ++nBSV; } else { if(fabs(alpha[i]) >= si.upper_bound_n) ++nBSV; } } } info("nSV = %d, nBSV = %d\n",nSV,nBSV); decision_function f; f.alpha = alpha; f.rho = si.rho; return f; } // Platt's binary SVM Probablistic Output: an improvement from Lin et al. static void sigmoid_train( int l, const double *dec_values, const double *labels, double& A, double& B) { double prior1=0, prior0 = 0; int i; for (i=0;i 0) prior1+=1; else prior0+=1; int max_iter=100; // Maximal number of iterations double min_step=1e-10; // Minimal step taken in line search double sigma=1e-12; // For numerically strict PD of Hessian double eps=1e-5; double hiTarget=(prior1+1.0)/(prior1+2.0); double loTarget=1/(prior0+2.0); double *t=Malloc(double,l); double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize; double newA,newB,newf,d1,d2; int iter; // Initial Point and Initial Fun Value A=0.0; B=log((prior0+1.0)/(prior1+1.0)); double fval = 0.0; for (i=0;i0) t[i]=hiTarget; else t[i]=loTarget; fApB = dec_values[i]*A+B; if (fApB>=0) fval += t[i]*fApB + log(1+exp(-fApB)); else fval += (t[i] - 1)*fApB +log(1+exp(fApB)); } for (iter=0;iter= 0) { p=exp(-fApB)/(1.0+exp(-fApB)); q=1.0/(1.0+exp(-fApB)); } else { p=1.0/(1.0+exp(fApB)); q=exp(fApB)/(1.0+exp(fApB)); } d2=p*q; h11+=dec_values[i]*dec_values[i]*d2; h22+=d2; h21+=dec_values[i]*d2; d1=t[i]-p; g1+=dec_values[i]*d1; g2+=d1; } // Stopping Criteria if (fabs(g1)= min_step) { newA = A + stepsize * dA; newB = B + stepsize * dB; // New function value newf = 0.0; for (i=0;i= 0) newf += t[i]*fApB + log(1+exp(-fApB)); else newf += (t[i] - 1)*fApB +log(1+exp(fApB)); } // Check sufficient decrease if (newf=max_iter) info("Reaching maximal iterations in two-class probability estimates\n"); free(t); } static double sigmoid_predict(double decision_value, double A, double B) { double fApB = decision_value*A+B; // 1-p used later; avoid catastrophic cancellation if (fApB >= 0) return exp(-fApB)/(1.0+exp(-fApB)); else return 1.0/(1+exp(fApB)) ; } // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng static void multiclass_probability(int k, double **r, double *p) { int t,j; int iter = 0, max_iter=max(100,k); double **Q=Malloc(double *,k); double *Qp=Malloc(double,k); double pQp, eps=0.005/k; for (t=0;tmax_error) max_error=error; } if (max_error=max_iter) info("Exceeds max_iter in multiclass_prob\n"); for(t=0;tl); double *dec_values = Malloc(double,prob->l); // random shuffle for(i=0;il;i++) perm[i]=i; for(i=0;il;i++) { int j = i+rand()%(prob->l-i); swap(perm[i],perm[j]); } for(i=0;il/nr_fold; int end = (i+1)*prob->l/nr_fold; int j,k; struct svm_problem subprob; subprob.l = prob->l-(end-begin); subprob.x = Malloc(struct svm_node*,subprob.l); subprob.y = Malloc(double,subprob.l); k=0; for(j=0;jx[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } for(j=end;jl;j++) { subprob.x[k] = prob->x[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } int p_count=0,n_count=0; for(j=0;j0) p_count++; else n_count++; if(p_count==0 && n_count==0) for(j=begin;j 0 && n_count == 0) for(j=begin;j 0) for(j=begin;jx[perm[j]],&(dec_values[perm[j]])); // ensure +1 -1 order; reason not using CV subroutine dec_values[perm[j]] *= submodel->label[0]; } svm_free_and_destroy_model(&submodel); svm_destroy_param(&subparam); } free(subprob.x); free(subprob.y); } sigmoid_train(prob->l,dec_values,prob->y,probA,probB); free(dec_values); free(perm); } // Return parameter of a Laplace distribution static double svm_svr_probability( const svm_problem *prob, const svm_parameter *param) { int i; int nr_fold = 5; double *ymv = Malloc(double,prob->l); double mae = 0; svm_parameter newparam = *param; newparam.probability = 0; svm_cross_validation(prob,&newparam,nr_fold,ymv); for(i=0;il;i++) { ymv[i]=prob->y[i]-ymv[i]; mae += fabs(ymv[i]); } mae /= prob->l; double std=sqrt(2*mae*mae); int count=0; mae=0; for(i=0;il;i++) if (fabs(ymv[i]) > 5*std) count=count+1; else mae+=fabs(ymv[i]); mae /= (prob->l-count); info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae); free(ymv); return mae; } // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data // perm, length l, must be allocated before calling this subroutine static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm) { int l = prob->l; int max_nr_class = 16; int nr_class = 0; int *label = Malloc(int,max_nr_class); int *count = Malloc(int,max_nr_class); int *data_label = Malloc(int,l); int i; for(i=0;iy[i]; int j; for(j=0;jparam = *param; model->free_sv = 0; // XXX if(param->svm_type == ONE_CLASS || param->svm_type == EPSILON_SVR || param->svm_type == NU_SVR) { // regression or one-class-svm model->nr_class = 2; model->label = NULL; model->nSV = NULL; model->probA = NULL; model->probB = NULL; model->sv_coef = Malloc(double *,1); if(param->probability && (param->svm_type == EPSILON_SVR || param->svm_type == NU_SVR)) { model->probA = Malloc(double,1); model->probA[0] = svm_svr_probability(prob,param); } decision_function f = svm_train_one(prob,param,0,0); model->rho = Malloc(double,1); model->rho[0] = f.rho; int nSV = 0; int i; for(i=0;il;i++) if(fabs(f.alpha[i]) > 0) ++nSV; model->l = nSV; model->SV = Malloc(svm_node *,nSV); model->sv_coef[0] = Malloc(double,nSV); int j = 0; for(i=0;il;i++) if(fabs(f.alpha[i]) > 0) { model->SV[j] = prob->x[i]; model->sv_coef[0][j] = f.alpha[i]; ++j; } free(f.alpha); } else { // classification int l = prob->l; int nr_class; int *label = NULL; int *start = NULL; int *count = NULL; int *perm = Malloc(int,l); // group training data of the same class svm_group_classes(prob,&nr_class,&label,&start,&count,perm); if(nr_class == 1) info("WARNING: training data in only one class. See README for details.\n"); svm_node **x = Malloc(svm_node *,l); int i; for(i=0;ix[perm[i]]; // calculate weighted C double *weighted_C = Malloc(double, nr_class); for(i=0;iC; for(i=0;inr_weight;i++) { int j; for(j=0;jweight_label[i] == label[j]) break; if(j == nr_class) fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]); else weighted_C[j] *= param->weight[i]; } // train k*(k-1)/2 models bool *nonzero = Malloc(bool,l); for(i=0;iprobability) { probA=Malloc(double,nr_class*(nr_class-1)/2); probB=Malloc(double,nr_class*(nr_class-1)/2); } int p = 0; for(i=0;iprobability) svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]); f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]); for(k=0;k 0) nonzero[si+k] = true; for(k=0;k 0) nonzero[sj+k] = true; free(sub_prob.x); free(sub_prob.y); ++p; } // build output model->nr_class = nr_class; model->label = Malloc(int,nr_class); for(i=0;ilabel[i] = label[i]; model->rho = Malloc(double,nr_class*(nr_class-1)/2); for(i=0;irho[i] = f[i].rho; if(param->probability) { model->probA = Malloc(double,nr_class*(nr_class-1)/2); model->probB = Malloc(double,nr_class*(nr_class-1)/2); for(i=0;iprobA[i] = probA[i]; model->probB[i] = probB[i]; } } else { model->probA=NULL; model->probB=NULL; } int total_sv = 0; int *nz_count = Malloc(int,nr_class); model->nSV = Malloc(int,nr_class); for(i=0;inSV[i] = nSV; nz_count[i] = nSV; } info("Total nSV = %d\n",total_sv); model->l = total_sv; model->SV = Malloc(svm_node *,total_sv); p = 0; for(i=0;iSV[p++] = x[i]; int *nz_start = Malloc(int,nr_class); nz_start[0] = 0; for(i=1;isv_coef = Malloc(double *,nr_class-1); for(i=0;isv_coef[i] = Malloc(double,total_sv); p = 0; for(i=0;isv_coef[j-1][q++] = f[p].alpha[k]; q = nz_start[j]; for(k=0;ksv_coef[i][q++] = f[p].alpha[ci+k]; ++p; } free(label); free(probA); free(probB); free(count); free(perm); free(start); free(x); free(weighted_C); free(nonzero); for(i=0;il; int *perm = Malloc(int,l); int nr_class; // stratified cv may not give leave-one-out rate // Each class to l folds -> some folds may have zero elements if((param->svm_type == C_SVC || param->svm_type == NU_SVC) && nr_fold < l) { int *start = NULL; int *label = NULL; int *count = NULL; svm_group_classes(prob,&nr_class,&label,&start,&count,perm); // random shuffle and then data grouped by fold using the array perm int *fold_count = Malloc(int,nr_fold); int c; int *index = Malloc(int,l); for(i=0;ix[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } for(j=end;jx[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } struct svm_model *submodel = svm_train(&subprob,param); if(param->probability && (param->svm_type == C_SVC || param->svm_type == NU_SVC)) { double *prob_estimates=Malloc(double,svm_get_nr_class(submodel)); for(j=begin;jx[perm[j]],prob_estimates); free(prob_estimates); } else for(j=begin;jx[perm[j]]); svm_free_and_destroy_model(&submodel); free(subprob.x); free(subprob.y); } free(fold_start); free(perm); } int svm_get_svm_type(const svm_model *model) { return model->param.svm_type; } int svm_get_nr_class(const svm_model *model) { return model->nr_class; } void svm_get_labels(const svm_model *model, int* label) { if (model->label != NULL) for(int i=0;inr_class;i++) label[i] = model->label[i]; } double svm_get_svr_probability(const svm_model *model) { if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) && model->probA!=NULL) return model->probA[0]; else { fprintf(stderr,"Model doesn't contain information for SVR probability inference\n"); return 0; } } double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values) { int i; if(model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) { double *sv_coef = model->sv_coef[0]; double sum = 0; for(i=0;il;i++) sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param); sum -= model->rho[0]; *dec_values = sum; if(model->param.svm_type == ONE_CLASS) return (sum>0)?1:-1; else return sum; } else { int nr_class = model->nr_class; int l = model->l; double *kvalue = Malloc(double,l); for(i=0;iSV[i],model->param); int *start = Malloc(int,nr_class); start[0] = 0; for(i=1;inSV[i-1]; int *vote = Malloc(int,nr_class); for(i=0;inSV[i]; int cj = model->nSV[j]; int k; double *coef1 = model->sv_coef[j-1]; double *coef2 = model->sv_coef[i]; for(k=0;krho[p]; dec_values[p] = sum; if(dec_values[p] > 0) ++vote[i]; else ++vote[j]; p++; } int vote_max_idx = 0; for(i=1;i vote[vote_max_idx]) vote_max_idx = i; free(kvalue); free(start); free(vote); return model->label[vote_max_idx]; } } double svm_predict(const svm_model *model, const svm_node *x) { int nr_class = model->nr_class; double *dec_values; if(model->param.svm_type == ONE_CLASS || model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) dec_values = Malloc(double, 1); else dec_values = Malloc(double, nr_class*(nr_class-1)/2); double pred_result = svm_predict_values(model, x, dec_values); free(dec_values); return pred_result; } double svm_predict_probability( const svm_model *model, const svm_node *x, double *prob_estimates) { if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && model->probA!=NULL && model->probB!=NULL) { int i; int nr_class = model->nr_class; double *dec_values = Malloc(double, nr_class*(nr_class-1)/2); svm_predict_values(model, x, dec_values); double min_prob=1e-7; double **pairwise_prob=Malloc(double *,nr_class); for(i=0;iprobA[k],model->probB[k]),min_prob),1-min_prob); pairwise_prob[j][i]=1-pairwise_prob[i][j]; k++; } multiclass_probability(nr_class,pairwise_prob,prob_estimates); int prob_max_idx = 0; for(i=1;i prob_estimates[prob_max_idx]) prob_max_idx = i; for(i=0;ilabel[prob_max_idx]; } else return svm_predict(model, x); } static const char *svm_type_table[] = { "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL }; static const char *kernel_type_table[]= { "linear","polynomial","rbf","sigmoid","precomputed",NULL }; int svm_save_model(const char *model_file_name, const svm_model *model) { FILE *fp = fopen(model_file_name,"w"); if(fp==NULL) return -1; char *old_locale = strdup(setlocale(LC_ALL, NULL)); setlocale(LC_ALL, "C"); const svm_parameter& param = model->param; fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]); fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]); if(param.kernel_type == POLY) fprintf(fp,"degree %d\n", param.degree); if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID) fprintf(fp,"gamma %g\n", param.gamma); if(param.kernel_type == POLY || param.kernel_type == SIGMOID) fprintf(fp,"coef0 %g\n", param.coef0); int nr_class = model->nr_class; int l = model->l; fprintf(fp, "nr_class %d\n", nr_class); fprintf(fp, "total_sv %d\n",l); { fprintf(fp, "rho"); for(int i=0;irho[i]); fprintf(fp, "\n"); } if(model->label) { fprintf(fp, "label"); for(int i=0;ilabel[i]); fprintf(fp, "\n"); } if(model->probA) // regression has probA only { fprintf(fp, "probA"); for(int i=0;iprobA[i]); fprintf(fp, "\n"); } if(model->probB) { fprintf(fp, "probB"); for(int i=0;iprobB[i]); fprintf(fp, "\n"); } if(model->nSV) { fprintf(fp, "nr_sv"); for(int i=0;inSV[i]); fprintf(fp, "\n"); } fprintf(fp, "SV\n"); const double * const *sv_coef = model->sv_coef; const svm_node * const *SV = model->SV; for(int i=0;ivalue)); else while(p->index != -1) { fprintf(fp,"%d:%.8g ",p->index,p->value); p++; } fprintf(fp, "\n"); } setlocale(LC_ALL, old_locale); free(old_locale); if (ferror(fp) != 0 || fclose(fp) != 0) return -1; else return 0; } static char *line = NULL; static int max_line_len; static char* readline(FILE *input) { int len; if(fgets(line,max_line_len,input) == NULL) return NULL; while(strrchr(line,'\n') == NULL) { max_line_len *= 2; line = (char *) realloc(line,max_line_len); len = (int) strlen(line); if(fgets(line+len,max_line_len-len,input) == NULL) break; } return line; } svm_model *svm_load_model(const char *model_file_name) { FILE *fp = fopen(model_file_name,"rb"); if(fp==NULL) return NULL; char *old_locale = strdup(setlocale(LC_ALL, NULL)); setlocale(LC_ALL, "C"); // read parameters svm_model *model = Malloc(svm_model,1); svm_parameter& param = model->param; model->rho = NULL; model->probA = NULL; model->probB = NULL; model->label = NULL; model->nSV = NULL; char cmd[81]; while(1) { fscanf(fp,"%80s",cmd); if(strcmp(cmd,"svm_type")==0) { fscanf(fp,"%80s",cmd); int i; for(i=0;svm_type_table[i];i++) { if(strcmp(svm_type_table[i],cmd)==0) { param.svm_type=i; break; } } if(svm_type_table[i] == NULL) { fprintf(stderr,"unknown svm type.\n"); setlocale(LC_ALL, old_locale); free(old_locale); free(model->rho); free(model->label); free(model->nSV); free(model); return NULL; } } else if(strcmp(cmd,"kernel_type")==0) { fscanf(fp,"%80s",cmd); int i; for(i=0;kernel_type_table[i];i++) { if(strcmp(kernel_type_table[i],cmd)==0) { param.kernel_type=i; break; } } if(kernel_type_table[i] == NULL) { fprintf(stderr,"unknown kernel function.\n"); setlocale(LC_ALL, old_locale); free(old_locale); free(model->rho); free(model->label); free(model->nSV); free(model); return NULL; } } else if(strcmp(cmd,"degree")==0) fscanf(fp,"%d",¶m.degree); else if(strcmp(cmd,"gamma")==0) fscanf(fp,"%lf",¶m.gamma); else if(strcmp(cmd,"coef0")==0) fscanf(fp,"%lf",¶m.coef0); else if(strcmp(cmd,"nr_class")==0) fscanf(fp,"%d",&model->nr_class); else if(strcmp(cmd,"total_sv")==0) fscanf(fp,"%d",&model->l); else if(strcmp(cmd,"rho")==0) { int n = model->nr_class * (model->nr_class-1)/2; model->rho = Malloc(double,n); for(int i=0;irho[i]); } else if(strcmp(cmd,"label")==0) { int n = model->nr_class; model->label = Malloc(int,n); for(int i=0;ilabel[i]); } else if(strcmp(cmd,"probA")==0) { int n = model->nr_class * (model->nr_class-1)/2; model->probA = Malloc(double,n); for(int i=0;iprobA[i]); } else if(strcmp(cmd,"probB")==0) { int n = model->nr_class * (model->nr_class-1)/2; model->probB = Malloc(double,n); for(int i=0;iprobB[i]); } else if(strcmp(cmd,"nr_sv")==0) { int n = model->nr_class; model->nSV = Malloc(int,n); for(int i=0;inSV[i]); } else if(strcmp(cmd,"SV")==0) { while(1) { int c = getc(fp); if(c==EOF || c=='\n') break; } break; } else { fprintf(stderr,"unknown text in model file: [%s]\n",cmd); setlocale(LC_ALL, old_locale); free(old_locale); free(model->rho); free(model->label); free(model->nSV); free(model); return NULL; } } // read sv_coef and SV int elements = 0; long pos = ftell(fp); max_line_len = 1024; line = Malloc(char,max_line_len); char *p,*endptr,*idx,*val; while(readline(fp)!=NULL) { p = strtok(line,":"); while(1) { p = strtok(NULL,":"); if(p == NULL) break; ++elements; } } elements += model->l; fseek(fp,pos,SEEK_SET); int m = model->nr_class - 1; int l = model->l; model->sv_coef = Malloc(double *,m); int i; for(i=0;isv_coef[i] = Malloc(double,l); model->SV = Malloc(svm_node*,l); svm_node *x_space = NULL; if(l>0) x_space = Malloc(svm_node,elements); int j=0; for(i=0;iSV[i] = &x_space[j]; p = strtok(line, " \t"); model->sv_coef[0][i] = strtod(p,&endptr); for(int k=1;ksv_coef[k][i] = strtod(p,&endptr); } while(1) { idx = strtok(NULL, ":"); val = strtok(NULL, " \t"); if(val == NULL) break; x_space[j].index = (int) strtol(idx,&endptr,10); x_space[j].value = strtod(val,&endptr); ++j; } x_space[j++].index = -1; } free(line); setlocale(LC_ALL, old_locale); free(old_locale); if (ferror(fp) != 0 || fclose(fp) != 0) return NULL; model->free_sv = 1; // XXX return model; } void svm_free_model_content(svm_model* model_ptr) { if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL) free((void *)(model_ptr->SV[0])); if(model_ptr->sv_coef) { for(int i=0;inr_class-1;i++) free(model_ptr->sv_coef[i]); } free(model_ptr->SV); model_ptr->SV = NULL; free(model_ptr->sv_coef); model_ptr->sv_coef = NULL; free(model_ptr->rho); model_ptr->rho = NULL; free(model_ptr->label); model_ptr->label= NULL; free(model_ptr->probA); model_ptr->probA = NULL; free(model_ptr->probB); model_ptr->probB= NULL; free(model_ptr->nSV); model_ptr->nSV = NULL; } void svm_free_and_destroy_model(svm_model** model_ptr_ptr) { if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL) { svm_free_model_content(*model_ptr_ptr); free(*model_ptr_ptr); *model_ptr_ptr = NULL; } } void svm_destroy_param(svm_parameter* param) { free(param->weight_label); free(param->weight); } const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param) { // svm_type int svm_type = param->svm_type; if(svm_type != C_SVC && svm_type != NU_SVC && svm_type != ONE_CLASS && svm_type != EPSILON_SVR && svm_type != NU_SVR) return "unknown svm type"; // kernel_type, degree int kernel_type = param->kernel_type; if(kernel_type != LINEAR && kernel_type != POLY && kernel_type != RBF && kernel_type != SIGMOID && kernel_type != PRECOMPUTED) return "unknown kernel type"; if(param->gamma < 0) return "gamma < 0"; if(param->degree < 0) return "degree of polynomial kernel < 0"; // cache_size,eps,C,nu,p,shrinking if(param->cache_size <= 0) return "cache_size <= 0"; if(param->eps <= 0) return "eps <= 0"; if(svm_type == C_SVC || svm_type == EPSILON_SVR || svm_type == NU_SVR) if(param->C <= 0) return "C <= 0"; if(svm_type == NU_SVC || svm_type == ONE_CLASS || svm_type == NU_SVR) if(param->nu <= 0 || param->nu > 1) return "nu <= 0 or nu > 1"; if(svm_type == EPSILON_SVR) if(param->p < 0) return "p < 0"; if(param->shrinking != 0 && param->shrinking != 1) return "shrinking != 0 and shrinking != 1"; if(param->probability != 0 && param->probability != 1) return "probability != 0 and probability != 1"; if(param->probability == 1 && svm_type == ONE_CLASS) return "one-class SVM probability output not supported yet"; // check whether nu-svc is feasible if(svm_type == NU_SVC) { int l = prob->l; int max_nr_class = 16; int nr_class = 0; int *label = Malloc(int,max_nr_class); int *count = Malloc(int,max_nr_class); int i; for(i=0;iy[i]; int j; for(j=0;jnu*(n1+n2)/2 > min(n1,n2)) { free(label); free(count); return "specified nu is infeasible"; } } } free(label); free(count); } return NULL; } int svm_check_probability_model(const svm_model *model) { return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && model->probA!=NULL && model->probB!=NULL) || ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) && model->probA!=NULL); } void svm_set_print_string_function(void (*print_func)(const char *)) { if(print_func == NULL) svm_print_string = &print_string_stdout; else svm_print_string = print_func; } NaN/src/str2array.cpp0000664002356700235670000002250312323251537015173 0ustar schloeglschloegl//------------------------------------------------------------------- // C-MEX implementation of STR2ARRAY - this function is part of the NaN-toolbox. // Actually, it also fixes a problem in STR2ARRAY.m described here: // http://www-old.cae.wisc.edu/pipermail/help-octave/2007-December/007325.html // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, see . // // // usage: // [...] = STR2ARRAY(s) // [...] = STR2ARRAY(sa) // [...] = STR2ARRAY(s,cdelim) // [...] = STR2ARRAY(s,cdelim,rdelim) // [...] = STR2ARRAY(s,cdelim,rdelim,ddelim) // [num,status,strarray] = STR2ARRAY(...) // // Input: // s char string // sa cell array of strings // cdelim column delimiter // rdelim row delimiter // ddelim decimal delimiter // // Output: // $Id: STR2ARRAY.cpp 7142 2010-03-30 18:48:06Z schloegl $ // Copyright (C) 2010,2011 Alois Schloegl // This function is part of the NaN-toolbox // http://pub.ist.ac.at/~schloegl/matlab/NaN/ // //------------------------------------------------------------------- #include #include #include #include #include "mex.h" #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; #endif #endif int str2val(char *s, double *r, double *i) { /* str2val converts string into numeric value. real and complex numbers are supported. complex numbers are "3.4 + 5.6i" or "3.4 + i * 5.6" (spaces are optional) input: s char string output: *r real value *i imaginary value return values: 0: conversion failed 1: real number returned: 2: complex number returned */ char *endptr = NULL; double val = strtod(s, &endptr); // conversion while (isspace(*endptr)) endptr++; #ifdef DEBUG mexPrintf("123<%s>\t,%f,\t[%s]\n",s,val,endptr); #endif if (!*endptr) { // conversion successful *r = val; return(1); } else if ((*endptr=='+') || (*endptr=='-')) { // imaginary part double sgn = (*endptr=='+') ? 1.0 : -1.0; double ival; while (isspace(*(++endptr))); if (*endptr=='i') { // case " a + i * b " while (isspace(*(++endptr))); if (*endptr=='*') { ival = strtod(endptr+1, &endptr); // conversion if (*endptr && !isspace(*endptr)) { return(0); // failed } else { *r = val; *i = sgn*ival; return(2); // } } else return(0); //failed } else { // case " a + bi " ival = strtod(endptr, &endptr); // conversion if (*endptr != 'i') return(0); endptr++; while (*endptr) { if (!isspace(*endptr)) return(0); endptr++; } *r = val; *i = sgn*ival; return(2); } } else if (*endptr && !isspace(*endptr)) { // conversion failed return(0); } } void mexFunction( int nlhs, /* number of expected outputs */ mxArray *plhs[], /* array of pointers to output arguments */ int nrhs, /* number of inputs */ const mxArray *prhs[] /* array of pointers to input arguments */ ) { char *s = NULL; const char *cdelim = "\x09,"; const char *rdelim = "\x0a;"; const char *ddelim = NULL; const char *valid_delim = " ()[]{},;:\"|/\x21\x22\x09\0x0a\0x0b\0x0c\0x0d\x00"; // valid delimiter uint8_t *u; size_t slen = 0,k; size_t maxcol=0, maxrow=0, nr, nc; if (nrhs<1) { mexPrintf(" STR2ARRAY.MEX converts delimiter text files into arrays of numerics and cell-strings\n"); mexPrintf(" STR2ARRAY.MEX converts delimiter text files into numeric arrays\n"); mexPrintf(" It fixes a problem of the old STR2DOUBLE discussed here: http://www-old.cae.wisc.edu/pipermail/help-octave/2007-December/007325.html\n"); mexPrintf(" at avoids using the insecure STR2NUM using EVAL\n"); mexPrintf("\n Usage of STR2ARRAY:\n"); mexPrintf("\t[...] = STR2ARRAY(s)\n"); mexPrintf("\t[...] = STR2ARRAY(sa)\n"); mexPrintf("\t[...] = STR2ARRAY(s,cdelim)\n"); mexPrintf("\t[...] = STR2ARRAY(s,cdelim,rdelim)\n"); mexPrintf("\t[...] = STR2ARRAY(s,cdelim,rdelim,ddelim)\n"); mexPrintf("\t[num,status,strarray] = STR2ARRAY(...)\n"); mexPrintf(" Input:\n\ts\tstring\n\tsa\tcell array of strings\n\tcdelim\tlist of column delimiters (default: \",\"\n\trdelim\tlist of row delimiter (defautlt: \";\")"); mexPrintf("\n\tddelim\tdecimal delimiter (default: \".\"). This is useful if decimal delimiter is a comma (e.g. after Excel export in Europe)\n"); mexPrintf(" Output:\n\tnum\tnumeric array\n\tstatus\tflag failing conversion\n\tstrarray\tcell array of strings contains strings of failed conversions\n"); mexPrintf("\nExamples:\n\tSTR2ARRAY('4.12')\n\tSTR2ARRAY('1.2 - 3.4e2i') complex numbers\n\tSTR2ARRAY('101.01 , 0-i4; 1.2 - i * 3.4, abc')\n\tSTR2ARRAY({'101.01', '0-i4'; '1.2 - i * 3.4', 'abc'})\n\tSTR2ARRAY('1,2;a,b,c;3,4')\n"); mexPrintf("\tSTR2ARRAY('1;2,3;4',';',',') exchange row- and column delimiter\n\tSTR2ARRAY('1,200 4;3,400 5',' ',';',',') replace decimal delimter\n"); return; } /* sanity check of input arguments */ if ((nrhs==1) && mxIsCell(prhs[0])) { // cell array of strings maxrow = mxGetM(prhs[0]); maxcol = mxGetN(prhs[0]); /* allocate output memory */ if (nlhs>2) plhs[2] = mxCreateCellMatrix(maxrow, maxcol); uint8_t *v = NULL; if (nlhs>1) { plhs[1] = mxCreateLogicalMatrix(maxrow, maxcol); v = (uint8_t*)mxGetData(plhs[1]); memset(v, 1, maxrow*maxcol); } plhs[0] = mxCreateDoubleMatrix(maxrow, maxcol, mxREAL); double *o = (double*)mxGetData(plhs[0]); double *oi= NULL; for (k=0; k2) mxSetCell(plhs[2], k, a); */ } else { int typ = str2val(s, o+k, &ival); if ((nlhs>2) && (typ==0)) mxSetCell(plhs[2], k, mxCreateString(s)); if ((nlhs>1) && (typ> 0)) v[k] = 0; if (typ==2) { if (mxGetPi(plhs[0])==NULL) { oi = (double*) mxCalloc(maxrow*maxcol, sizeof(double)); mxSetPi(plhs[0], oi); } oi[k] = ival; } } } // cell-array input is finished return; } if (nrhs>0) { if (mxIsChar(prhs[0])) { s = mxArrayToString(prhs[0]); slen = mxGetNumberOfElements(prhs[0]); } else mexErrMsgTxt("arg1 is not a char array"); } if (nrhs>1) { if (mxIsChar(prhs[1])) cdelim = mxArrayToString(prhs[1]); else mexErrMsgTxt("arg2 is not a char array"); } if (nrhs>2) { if (mxIsChar(prhs[2])) rdelim = mxArrayToString(prhs[2]); else mexErrMsgTxt("arg3 is not a char array"); } if (nrhs>3) { if (mxIsChar(prhs[3]) && (mxGetNumberOfElements(prhs[3])==1) ) { ddelim = mxArrayToString(prhs[3]); for (k=0; k0); u[slen] = 2; } for (k = 0; k < slen; ) { if (u[k]==2) { s[k] = 0; nr++; if (nc > maxcol) maxcol=nc; nc = 0; } else if (u[k]==1) { s[k] = 0; nc++; } k++; } if (nc > maxcol) maxcol=nc; maxcol += (slen>0); maxrow = nr; /* allocate output memory */ if (nlhs>2) plhs[2] = mxCreateCellMatrix(maxrow, maxcol); uint8_t *v = NULL; if (nlhs>1) { plhs[1] = mxCreateLogicalMatrix(maxrow, maxcol); v = (uint8_t*)mxGetData(plhs[1]); memset(v,1,maxrow*maxcol); } plhs[0] = mxCreateDoubleMatrix(maxrow, maxcol, mxREAL); double *o = (double*)mxGetData(plhs[0]); double *oi = NULL; for (k=0; k2) && (typ==0)) mxSetCell(plhs[2], idx, mxCreateString(s+last)); if ((nlhs>1) && (typ> 0)) v[idx] = 0; if (typ==2) { if (oi==NULL) { oi = (double*) mxCalloc(maxrow*maxcol, sizeof(double)); mxSetPi(plhs[0], oi); } oi[idx] = ival; } } nc++; // next element if (u[k]==2) { nr++; // next row nc = 0; } last = k+1; } } mxFree(u); }; NaN/src/svm_model_matlab.c0000664002356700235670000002421012512506253016202 0ustar schloeglschloegl/* $Id: svm_model_matlab.c 12776 2015-04-12 15:18:03Z schloegl $ Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin Copyright (c) 2010 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from libsvm-mat-2.9-1 in Jan 2010 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither name of copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "svm.h" #include "mex.h" #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; typedef int mwIndex; #endif #endif #define NUM_OF_RETURN_FIELD 10 #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) static const char *field_names[] = { "Parameters", "nr_class", "totalSV", "rho", "Label", "ProbA", "ProbB", "nSV", "sv_coef", "SVs" }; #ifdef __cplusplus extern "C" { #endif const char *model_to_matlab_structure(mxArray *plhs[], int num_of_feature, struct svm_model *model) { int i, j, n; double *ptr; mxArray *return_model, **rhs; int out_id = 0; rhs = (mxArray **)mxMalloc(sizeof(mxArray *)*NUM_OF_RETURN_FIELD); /* Parameters */ rhs[out_id] = mxCreateDoubleMatrix(5, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); ptr[0] = model->param.svm_type; ptr[1] = model->param.kernel_type; ptr[2] = model->param.degree; ptr[3] = model->param.gamma; ptr[4] = model->param.coef0; out_id++; /* nr_class */ rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); ptr[0] = model->nr_class; out_id++; /* total SV */ rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); ptr[0] = model->l; out_id++; /* rho */ n = model->nr_class*(model->nr_class-1)/2; rhs[out_id] = mxCreateDoubleMatrix(n, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); for(i = 0; i < n; i++) ptr[i] = model->rho[i]; out_id++; /* Label */ if(model->label) { rhs[out_id] = mxCreateDoubleMatrix(model->nr_class, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); for(i = 0; i < model->nr_class; i++) ptr[i] = model->label[i]; } else rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); out_id++; /* probA */ if(model->probA != NULL) { rhs[out_id] = mxCreateDoubleMatrix(n, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); for(i = 0; i < n; i++) ptr[i] = model->probA[i]; } else rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); out_id ++; /* probB */ if(model->probB != NULL) { rhs[out_id] = mxCreateDoubleMatrix(n, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); for(i = 0; i < n; i++) ptr[i] = model->probB[i]; } else rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); out_id++; /* nSV */ if(model->nSV) { rhs[out_id] = mxCreateDoubleMatrix(model->nr_class, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); for(i = 0; i < model->nr_class; i++) ptr[i] = model->nSV[i]; } else rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); out_id++; /* sv_coef */ rhs[out_id] = mxCreateDoubleMatrix(model->l, model->nr_class-1, mxREAL); ptr = mxGetPr(rhs[out_id]); for(i = 0; i < model->nr_class-1; i++) for(j = 0; j < model->l; j++) ptr[(i*(model->l))+j] = model->sv_coef[i][j]; out_id++; /* SVs */ { int ir_index, nonzero_element; mwIndex *ir, *jc; mxArray *pprhs[1], *pplhs[1]; if(model->param.kernel_type == PRECOMPUTED) { nonzero_element = model->l; num_of_feature = 1; } else { nonzero_element = 0; for(i = 0; i < model->l; i++) { j = 0; while(model->SV[i][j].index != -1) { nonzero_element++; j++; } } } /* SV in column, easier accessing */ rhs[out_id] = mxCreateSparse(num_of_feature, model->l, nonzero_element, mxREAL); ir = mxGetIr(rhs[out_id]); jc = mxGetJc(rhs[out_id]); ptr = mxGetPr(rhs[out_id]); jc[0] = ir_index = 0; for(i = 0;i < model->l; i++) { if(model->param.kernel_type == PRECOMPUTED) { /* make a (1 x model->l) matrix */ ir[ir_index] = 0; ptr[ir_index] = model->SV[i][0].value; ir_index++; jc[i+1] = jc[i] + 1; } else { int x_index = 0; while (model->SV[i][x_index].index != -1) { ir[ir_index] = model->SV[i][x_index].index - 1; ptr[ir_index] = model->SV[i][x_index].value; ir_index++, x_index++; } jc[i+1] = jc[i] + x_index; } } /* transpose back to SV in row */ pprhs[0] = rhs[out_id]; if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) return "cannot transpose SV matrix"; rhs[out_id] = pplhs[0]; out_id++; } /* Create a struct matrix contains NUM_OF_RETURN_FIELD fields */ return_model = mxCreateStructMatrix(1, 1, NUM_OF_RETURN_FIELD, field_names); /* Fill struct matrix with input arguments */ for(i = 0; i < NUM_OF_RETURN_FIELD; i++) mxSetField(return_model,0,field_names[i],mxDuplicateArray(rhs[i])); /* return */ plhs[0] = return_model; mxFree(rhs); return NULL; } struct svm_model *matlab_matrix_to_model(const mxArray *matlab_struct, const char **msg) { int i, j, n, num_of_fields; double *ptr; int id = 0; struct svm_node *x_space; struct svm_model *model; mxArray **rhs; num_of_fields = mxGetNumberOfFields(matlab_struct); if(num_of_fields != NUM_OF_RETURN_FIELD) { *msg = "number of return field is not correct"; return NULL; } rhs = (mxArray **) mxMalloc(sizeof(mxArray *)*num_of_fields); for(i=0;irho = NULL; model->probA = NULL; model->probB = NULL; model->label = NULL; model->nSV = NULL; model->free_sv = 1; /* XXX */ ptr = mxGetPr(rhs[id]); model->param.svm_type = (int)ptr[0]; model->param.kernel_type = (int)ptr[1]; model->param.degree = (int)ptr[2]; model->param.gamma = ptr[3]; model->param.coef0 = ptr[4]; id++; ptr = mxGetPr(rhs[id]); model->nr_class = (int)ptr[0]; id++; ptr = mxGetPr(rhs[id]); model->l = (int)ptr[0]; id++; /* rho */ n = model->nr_class * (model->nr_class-1)/2; model->rho = (double*) malloc(n*sizeof(double)); ptr = mxGetPr(rhs[id]); for(i=0;irho[i] = ptr[i]; id++; /* label */ if(mxIsEmpty(rhs[id]) == 0) { model->label = (int*) malloc(model->nr_class*sizeof(int)); ptr = mxGetPr(rhs[id]); for(i=0;inr_class;i++) model->label[i] = (int)ptr[i]; } id++; /* probA */ if(mxIsEmpty(rhs[id]) == 0) { model->probA = (double*) malloc(n*sizeof(double)); ptr = mxGetPr(rhs[id]); for(i=0;iprobA[i] = ptr[i]; } id++; /* probB */ if(mxIsEmpty(rhs[id]) == 0) { model->probB = (double*) malloc(n*sizeof(double)); ptr = mxGetPr(rhs[id]); for(i=0;iprobB[i] = ptr[i]; } id++; /* nSV */ if(mxIsEmpty(rhs[id]) == 0) { model->nSV = (int*) malloc(model->nr_class*sizeof(int)); ptr = mxGetPr(rhs[id]); for(i=0;inr_class;i++) model->nSV[i] = (int)ptr[i]; } id++; /* sv_coef */ ptr = mxGetPr(rhs[id]); model->sv_coef = (double**) malloc((model->nr_class-1)*sizeof(double)); for( i=0 ; i< model->nr_class -1 ; i++ ) model->sv_coef[i] = (double*) malloc((model->l)*sizeof(double)); for(i = 0; i < model->nr_class - 1; i++) for(j = 0; j < model->l; j++) model->sv_coef[i][j] = ptr[i*(model->l)+j]; id++; /* SV */ { int sr, sc, elements; int num_samples; mwIndex *ir, *jc; mxArray *pprhs[1], *pplhs[1]; /* transpose SV */ pprhs[0] = rhs[id]; if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) { svm_free_and_destroy_model(&model); *msg = "cannot transpose SV matrix"; return NULL; } rhs[id] = pplhs[0]; sr = (int)mxGetN(rhs[id]); sc = (int)mxGetM(rhs[id]); ptr = mxGetPr(rhs[id]); ir = mxGetIr(rhs[id]); jc = mxGetJc(rhs[id]); num_samples = (int)mxGetNzmax(rhs[id]); elements = num_samples + sr; model->SV = (struct svm_node **) malloc(sr * sizeof(struct svm_node *)); x_space = (struct svm_node *)malloc(elements * sizeof(struct svm_node)); /* SV is in column */ for(i=0;iSV[i] = &x_space[low+i]; for(j=low;jSV[i][x_index].index = (int)ir[j] + 1; model->SV[i][x_index].value = ptr[j]; x_index++; } model->SV[i][x_index].index = -1; } id++; } mxFree(rhs); return model; } #ifdef __cplusplus } #endif NaN/src/train.c0000664002356700235670000002406612512552402014020 0ustar schloeglschloegl/* $Id: train.c 12779 2015-04-12 20:26:42Z schloegl $ Copyright (c) 2007-2009 The LIBLINEAR Project. Copyright (c) 2010 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from liblinear-1.51 in Jan 2010 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #include #include #include #include #include #include "linear.h" #include "mex.h" #include "linear_model_matlab.h" #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; typedef int mwIndex; #endif #endif #define CMD_LEN 2048 #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) #define INF HUGE_VAL void print_null(const char *s){} void (*liblinear_default_print_string) (const char *); void exit_with_help() { mexPrintf( "Usage: model = train(weight_vector, training_label_vector, training_instance_matrix, 'liblinear_options', 'col');\n" "liblinear_options:\n" "-s type : set type of solver (default 1)\n" " 0 -- L2-regularized logistic regression\n" " 1 -- L2-regularized L2-loss support vector classification (dual)\n" " 2 -- L2-regularized L2-loss support vector classification (primal)\n" " 3 -- L2-regularized L1-loss support vector classification (dual)\n" " 4 -- multi-class support vector classification by Crammer and Singer\n" " 5 -- L1-regularized L2-loss support vector classification\n" " 6 -- L1-regularized logistic regression\n" "-c cost : set the parameter C (default 1)\n" "-e epsilon : set tolerance of termination criterion\n" " -s 0 and 2\n" " |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" " where f is the primal function, (default 0.01)\n" " -s 1, 3, and 4\n" " Dual maximal violation <= eps; similar to libsvm (default 0.1)\n" " -s 5 and 6\n" " |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,\n" " where f is the primal function (default 0.01)\n" "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n" "-wi weight: weights adjust the parameter C of different classes (see README for details)\n" "-v n: n-fold cross validation mode\n" "-q : quiet mode (no outputs)\n" "col:\n" " if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n" ); } // liblinear arguments struct parameter param; // set by parse_command_line struct problem prob; // set by read_problem struct model *model_; struct feature_node *x_space; int cross_validation_flag; int col_format_flag; int nr_fold; double bias; double do_cross_validation() { int i; int total_correct = 0; int *target = Malloc(int,prob.l); double retval = 0.0; cross_validation(&prob,¶m,nr_fold,target); for(i=0;i 3) { mxGetString(prhs[3], cmd, mxGetN(prhs[3]) + 1); if((argv[argc] = strtok(cmd, " ")) != NULL) while((argv[++argc] = strtok(NULL, " ")) != NULL) ; } // parse options for(i=1;i=argc && argv[i-1][1] != 'q') // since option -q has no parameter return 1; switch(argv[i-1][1]) { case 's': param.solver_type = atoi(argv[i]); break; case 'c': param.C = atof(argv[i]); break; case 'e': param.eps = atof(argv[i]); break; case 'B': bias = atof(argv[i]); break; case 'v': cross_validation_flag = 1; nr_fold = atoi(argv[i]); if(nr_fold < 2) { mexPrintf("n-fold cross validation: n must >= 2\n"); return 1; } break; case 'w': ++param.nr_weight; param.weight_label = (int *) realloc(param.weight_label,sizeof(int)*param.nr_weight); param.weight = (double *) realloc(param.weight,sizeof(double)*param.nr_weight); param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]); param.weight[param.nr_weight-1] = atof(argv[i]); break; case 'q': liblinear_print_string = &print_null; i--; break; default: mexPrintf("unknown option\n"); return 1; } } if(param.eps == INF) { if(param.solver_type == L2R_LR || param.solver_type == L2R_L2LOSS_SVC) param.eps = 0.01; else if(param.solver_type == L2R_L2LOSS_SVC_DUAL || param.solver_type == L2R_L1LOSS_SVC_DUAL || param.solver_type == MCSVM_CS) param.eps = 0.1; else if(param.solver_type == L1R_L2LOSS_SVC || param.solver_type == L1R_LR) param.eps = 0.01; } return 0; } static void fake_answer(mxArray *plhs[]) { plhs[0] = mxCreateDoubleMatrix(0, 0, mxREAL); } int read_problem_sparse(const mxArray *weight_vec, const mxArray *label_vec, const mxArray *instance_mat) { int i, j, k, low, high; mwIndex *ir, *jc; int elements, max_index, num_samples, label_vector_row_num, weight_vector_row_num; double *samples, *labels, *weights; mxArray *instance_mat_col; // instance sparse matrix in column format prob.x = NULL; prob.y = NULL; prob.W = NULL; x_space = NULL; if(col_format_flag) instance_mat_col = (mxArray *)instance_mat; else { // transpose instance matrix mxArray *prhs[1], *plhs[1]; prhs[0] = mxDuplicateArray(instance_mat); if(mexCallMATLAB(1, plhs, 1, prhs, "transpose")) { mexPrintf("Error: cannot transpose training instance matrix\n"); return -1; } instance_mat_col = plhs[0]; mxDestroyArray(prhs[0]); } // the number of instance prob.l = (int) mxGetN(instance_mat_col); weight_vector_row_num = (int) mxGetM(weight_vec); label_vector_row_num = (int) mxGetM(label_vec); if(weight_vector_row_num == 0) ;//mexPrintf("Warning: treat each instance with weight 1.0\n"); else if(weight_vector_row_num!=prob.l) { mexPrintf("Length of weight vector does not match # of instances.\n"); return -1; } if(label_vector_row_num!=prob.l) { mexPrintf("Length of label vector does not match # of instances.\n"); return -1; } // each column is one instance weights = mxGetPr(weight_vec); labels = mxGetPr(label_vec); samples = mxGetPr(instance_mat_col); ir = mxGetIr(instance_mat_col); jc = mxGetJc(instance_mat_col); num_samples = (int) mxGetNzmax(instance_mat_col); elements = num_samples + prob.l*2; max_index = (int) mxGetM(instance_mat_col); prob.y = Malloc(int, prob.l); prob.W = Malloc(double,prob.l); prob.x = Malloc(struct feature_node*, prob.l); x_space = Malloc(struct feature_node, elements); prob.bias=bias; j = 0; for(i=0;i 0) prob.W[i] *= (double) weights[i]; low = (int) jc[i], high = (int) jc[i+1]; for(k=low;k=0) { x_space[j].index = max_index+1; x_space[j].value = prob.bias; j++; } x_space[j++].index = -1; } if(prob.bias>=0) prob.n = max_index+1; else prob.n = max_index; return 0; } // Interface function of matlab // now assume prhs[0]: label prhs[1]: features void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] ) { const char *error_msg; // fix random seed to have same results for each run // (for cross validation) srand(1); // Transform the input Matrix to libsvm format if(nrhs > 2 && nrhs < 6) { int err=0; if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1]) || !mxIsDouble(prhs[2])) { mexPrintf("Error: weight vector, label vector and instance matrix must be double\n"); fake_answer(plhs); return; } if(parse_command_line(nrhs, prhs, NULL)) { exit_with_help(); destroy_param(¶m); fake_answer(plhs); return; } if(mxIsSparse(prhs[2])) err = read_problem_sparse(prhs[0], prhs[1], prhs[2]); else { mexPrintf("Training_instance_matrix must be sparse\n"); destroy_param(¶m); fake_answer(plhs); return; } // train's original code error_msg = check_parameter(¶m); if(err || error_msg) { if (error_msg != NULL) mexPrintf("Error: %s\n", error_msg); destroy_param(¶m); free(prob.y); free(prob.x); free(x_space); fake_answer(plhs); return; } if(cross_validation_flag) { double *ptr; plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL); ptr = mxGetPr(plhs[0]); ptr[0] = do_cross_validation(); } else { const char *error_msg; model_ = train(&prob, ¶m); error_msg = model_to_matlab_structure(plhs, model_); if(error_msg) mexPrintf("Error: can't convert libsvm model to matrix structure: %s\n", error_msg); destroy_model(model_); } destroy_param(¶m); free(prob.y); free(prob.x); free(prob.W); free(x_space); } else { exit_with_help(); fake_answer(plhs); return; } } NaN/src/xptopen.cpp0000775002356700235670000007660112507564404014756 0ustar schloeglschloegl//------------------------------------------------------------------- // XPTOPEN is C-MEX implementation for reading various // statistical data formats including SAS/XPT, SPSS/PASW, // STATA and ARFF data formats. Basic support for writing // SAS/XPT is also supported. // Endian conversion is done automatically. // // usage: x = xptopen(filename) // usage: x = xptopen(filename,'r') // read filename and return variables in struct x // usage: xptopen(filename,'w',x) // save fields of struct x in filename // usage: x = xptopen(filename,'a',x) // append fields of struct x to filename // // References: // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, see . // // $Id: xptopen.cpp 12770 2015-04-03 19:24:52Z schloegl $ // Copyright (C) 2010,2011,2012,2013 Alois Schloegl // This function is part of the NaN-toolbox // http://pub.ist.ac.at/~schloegl/matlab/NaN/ // // References: // [1] TS-140 THE RECORD LAYOUT OF A DATA SET IN SAS TRANSPORT (XPORT) FORMAT // http://support.sas.com/techsup/technote/ts140.html // [2] IBM floating point format // http://en.wikipedia.org/wiki/IBM_Floating_Point_Architecture // [3] see http://old.nabble.com/Re%3A-IBM-integer-and-double-formats-p20428979.html // [4] STATA File Format // http://www.stata.com/help.cgi?dta // http://www.stata.com/help.cgi?dta_113 //------------------------------------------------------------------- /* SPSS file format // http://cvs.savannah.gnu.org/pspp/doc/data-file-format.texi?root=pspp&content-type=text%2Fplain */ #define TEST_CONVERSION 0 // 0: ieee754, 1: SAS converter (big endian bug), 2: experimental #define DEBUG 1 #include #include //#include #include #include #include #include #include #include "mex.h" #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; typedef int mwIndex; #endif #endif #define NaN (0.0/0.0) #define fix(m) (m<0 ? ceil(m) : floor(m)) #define max(a,b) (((a) > (b)) ? (a) : (b)) #define min(a,b) (((a) < (b)) ? (a) : (b)) #if 0 #elif defined(__linux__) # include # include #elif defined(__CYGWIN__) # include # include #elif defined(__GLIBC__) // for Hurd # include # include #elif defined(__MINGW32__) /* use local version because MINGW does not provide byteswap.h */ # define __BIG_ENDIAN 4321 # define __LITTLE_ENDIAN 1234 # define __BYTE_ORDER __LITTLE_ENDIAN #elif defined(__NetBSD__) # include # define __BIG_ENDIAN _BIG_ENDIAN # define __LITTLE_ENDIAN _LITTLE_ENDIAN # define __BYTE_ORDER _BYTE_ORDER # define bswap_16(x) bswap16(x) # define bswap_32(x) bswap32(x) # define bswap_64(x) bswap64(x) #elif defined(_APPLE_) && defined(_MACH_) # include # define _BYTE_ORDER __DARWIN_BYTE_ORDER # define _LITTLE_ENDIAN __DARWIN_LITTLE_ENDIAN # define _BIG_ENDIAN __DARWIN_BIG_ENDIAN # define bswap_16(x) __bswap16(x) # define bswap_32(x) __bswap32(x) # define bswap_64(x) __bswap64(x) #elif defined(__APPLE__) # include # define __BIG_ENDIAN 4321 # define __LITTLE_ENDIAN 1234 #if (defined(__LITTLE_ENDIAN__) && (__LITTLE_ENDIAN__ == 1)) #define __BYTE_ORDER __LITTLE_ENDIAN #else #define __BYTE_ORDER __BIG_ENDIAN #endif # define bswap_16(x) CFSwapInt16(x) # define bswap_32(x) CFSwapInt32(x) # define bswap_64(x) CFSwapInt64(x) #elif (defined(BSD) && (BSD >= 199103)) && !defined(__GLIBC__) # include # define __BIG_ENDIAN _BIG_ENDIAN # define __LITTLE_ENDIAN _LITTLE_ENDIAN # define __BYTE_ORDER _BYTE_ORDER # define bswap_16(x) __bswap16(x) # define bswap_32(x) __bswap32(x) # define bswap_64(x) __bswap64(x) #elif defined(__GNUC__) /* use byteswap macros from the host system, hopefully optimized ones ;-) */ # include # include # define bswap_16(x) __bswap_16 (x) # define bswap_32(x) __bswap_32 (x) # define bswap_64(x) __bswap_64 (x) #elif defined(__sparc__) # define __BIG_ENDIAN 4321 # define __LITTLE_ENDIAN 1234 # define __BYTE_ORDER __BIG_ENDIAN #else # error Unknown platform #endif #if defined(__MINGW32__) || defined(__sparc__) # ifndef bswap_16 # define bswap_16(x) \ ((((x) & 0xff00) >> 8) | (((x) & 0x00ff) << 8)) # endif # ifndef bswap_32 # define bswap_32(x) \ ((((x) & 0xff000000) >> 24) \ | (((x) & 0x00ff0000) >> 8) \ | (((x) & 0x0000ff00) << 8) \ | (((x) & 0x000000ff) << 24)) # endif # ifndef bswap_64 # define bswap_64(x) \ ((((x) & 0xff00000000000000ull) >> 56) \ | (((x) & 0x00ff000000000000ull) >> 40) \ | (((x) & 0x0000ff0000000000ull) >> 24) \ | (((x) & 0x000000ff00000000ull) >> 8) \ | (((x) & 0x00000000ff000000ull) << 8) \ | (((x) & 0x0000000000ff0000ull) << 24) \ | (((x) & 0x000000000000ff00ull) << 40) \ | (((x) & 0x00000000000000ffull) << 56)) # endif #endif #if !defined(__BIG_ENDIAN) && !defined(__LITTLE_ENDIAN) #error ENDIANITY is not known #endif #if __BYTE_ORDER == __BIG_ENDIAN #define l_endian_u16(x) ((uint16_t)bswap_16((uint16_t)(x))) #define l_endian_u32(x) ((uint32_t)bswap_32((uint32_t)(x))) #define l_endian_u64(x) ((uint64_t)bswap_64((uint64_t)(x))) #define l_endian_i16(x) ((int16_t)bswap_16((int16_t)(x))) #define l_endian_i32(x) ((int32_t)bswap_32((int32_t)(x))) #define l_endian_i64(x) ((int64_t)bswap_64((int64_t)(x))) #define b_endian_u16(x) ((uint16_t)(x)) #define b_endian_u32(x) ((uint32_t)(x)) #define b_endian_u64(x) ((uint64_t)(x)) #define b_endian_i16(x) ((int16_t)(x)) #define b_endian_i32(x) ((int32_t)(x)) #define b_endian_i64(x) ((int64_t)(x)) #elif __BYTE_ORDER==__LITTLE_ENDIAN #define l_endian_u16(x) ((uint16_t)(x)) #define l_endian_u32(x) ((uint32_t)(x)) #define l_endian_u64(x) ((uint64_t)(x)) #define l_endian_i16(x) ((int16_t)(x)) #define l_endian_i32(x) ((int32_t)(x)) #define l_endian_i64(x) ((int64_t)(x)) #define b_endian_u16(x) ((uint16_t)bswap_16((uint16_t)(x))) #define b_endian_u32(x) ((uint32_t)bswap_32((uint32_t)(x))) #define b_endian_u64(x) ((uint64_t)bswap_64((uint64_t)(x))) #define b_endian_i16(x) ((int16_t)bswap_16((int16_t)(x))) #define b_endian_i32(x) ((int32_t)bswap_32((int32_t)(x))) #define b_endian_i64(x) ((int64_t)bswap_64((int64_t)(x))) #endif /* __BYTE_ORDER */ /* Including ZLIB enables reading gzipped files (they are decompressed on-the-fly) The output files can be zipped, too. */ #ifdef WITH_ZLIB #include #endif double xpt2d(uint64_t x); uint64_t d2xpt(double x); double tm_time2gdf_time(struct tm *t); void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) { const char L1[] = "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000 "; const char L2[] = "SAS SAS SASLIB 6.06 bsd4.2 13APR89:10:20:06"; //const char L3[] = ""; const char L4[] = "HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000140 "; const char L5[] = "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000 "; const char L6[] = "SAS ABC SASLIB 6.06 bsd4.2 13APR89:10:20:06"; //const char L7[] = ""; const char L8[] = "HEADER RECORD*******NAMESTR HEADER RECORD!!!!!!!000000000200000000000000000000 "; const char LO[] = "HEADER RECORD*******OBS HEADER RECORD!!!!!!!000000000000000000000000000000 "; const char DATEFORMAT[] = "%d%b%y:%H:%M:%S"; char *fn = NULL; char Mode[3] = "r"; size_t count = 0, HeadLen0=80*8, HeadLen2=0, sz2 = 0; uint32_t NS = 0; char H0[HeadLen0]; char *H2 = NULL; char SWAP = 0; #ifndef ZLIB_H FILE *fid; #else gzFile fid; #define fopen gzopen #define fread(a,b,c,d) (gzread(d,a,b*c)/b) #define fwrite(a,b,c,d) (gzwrite(d,a,b*c)/b) #define feof gzeof #define fseek gzseek #define fclose gzclose #define rewind(fid) (gzseek(fid,0,SEEK_SET)) #endif // check for proper number of input and output arguments if ( PInputCount > 0 && mxGetClassID(PInputs[0])==mxCHAR_CLASS) { size_t buflen = (mxGetM(PInputs[0]) * mxGetN(PInputs[0]) * sizeof(mxChar)) + 1; fn = (char*)malloc(buflen); mxGetString(PInputs[0], fn, buflen); } else { mexPrintf("XPTOPEN read of several file formats and writing of the SAS Transport Format (*.xpt)\n"); mexPrintf("\n\tX = xptopen(filename)\n"); mexPrintf("\tX = xptopen(filename,'r')\n"); mexPrintf("\t\tread filename and return variables in struct X\n"); #ifdef ZLIB_H mexPrintf("\tSupported are ARFF, SAS-XPT and STATA files with or w/o zlib/gzip compression.\n"); #else mexPrintf("\tSupported are ARFF, SAS-XPT and STATA files.\n"); #endif mexPrintf("\n\tX = xptopen(filename,'w',X)\n"); mexPrintf("\t\tsave fields of struct X in filename.\n\n"); mexPrintf("\tThe fields of X must be column vectors of equal length.\n"); mexPrintf("\tEach vector is either a numeric vector or a cell array of strings.\n"); mexPrintf("\nThe SAS-XPT format stores Date/Time as numeric value counting the number of days since 1960-01-01.\n\n"); return; } if (PInputCount > 1) if (mxGetClassID(PInputs[1])==mxCHAR_CLASS && mxGetNumberOfElements(PInputs[1])) { mxGetString(PInputs[1],Mode,3); Mode[2]=0; } fid = fopen(fn,Mode); if (fid < 0) { mexErrMsgTxt("Can not open file!\n"); } if (Mode[0]=='r' || Mode[0]=='a' ) { count += fread(H0,1,80*8,fid); enum FileFormat { noFile, unknown, ARFF, SASXPT, SPSS, SQLite, STATA }; enum FileFormat TYPE; /* type of file format */ uint8_t LittleEndian; /* 1 if file is LittleEndian data format and 0 for big endian data format*/ TYPE = unknown; if (!memcmp(H0,"$FL2@(#) SPSS DATA FILE",23) || !memcmp(H0,"$FL2@(#) PASW STATISTICS DATA FILE",27)) { /* SPSS file format */ uint32_t M=0; mexWarnMsgTxt("XPTOPEN: support of for SPSS file format is very experimental (do not use it for production use)\n"); TYPE = SPSS; switch (*(uint32_t*)(H0+64)) { case 0x00000002: case 0x00000003: LittleEndian = 1; SWAP = __BYTE_ORDER==__BIG_ENDIAN; NS = l_endian_u32(*(uint32_t*)(H0+68)); M = l_endian_u32(*(uint32_t*)(H0+80)); break; case 0x02000000: case 0x03000000: SWAP = __BYTE_ORDER==__LITTLE_ENDIAN; LittleEndian = 0; NS = b_endian_u32(*(uint32_t*)(H0+68)); M = b_endian_u32(*(uint32_t*)(H0+80)); break; default: TYPE = unknown; } NS = *(int32_t*)(H0+80); M = *(int32_t*)(H0+80); if (SWAP) { NS = bswap_32(NS); M = bswap_32(M); } HeadLen0 = 184; char *H2 = (char*)malloc(NS*32); size_t c2 = 0; /* Read Variable SPSS header */ int ns = 0; const char **ListOfVarNames = (const char**)malloc((NS+1) * sizeof(char*)); char *VarNames = (char*)malloc((NS+1) * sizeof(char) * 9); double *MISSINGS = (double*)malloc((NS+1) * sizeof(double)); for (uint32_t k=0; k=0x6e || H0[0]<=114) && (H0[1]==1 || H0[1]==2) && H0[2]==1 && H0[3]==0) { /* STATA File Format http://www.stata.com/help.cgi?dta http://www.stata.com/help.cgi?dta_113 Stata files written by R start with 0x6e */ uint32_t M=0; TYPE = STATA; // Header 119 bytes LittleEndian = H0[1]==2; if (LittleEndian) { NS = l_endian_u16(*(uint16_t*)(H0+4)); M = l_endian_u32(*(uint32_t*)(H0+6)); } else { NS = b_endian_u16(*(uint16_t*)(H0+4)); M = b_endian_u32(*(uint32_t*)(H0+6)); } // Descriptors int fmtlen = (H0[0]==113) ? 12 : 49; fseek(fid,109,SEEK_SET); size_t HeadLen2 = 2+NS*(1+33+2+fmtlen+33+81); char *H1 = (char*)malloc(HeadLen2); HeadLen2 = fread(H1,1,HeadLen2,fid); // expansion fields char typ; int32_t len; char flagSWAP = (((__BYTE_ORDER == __BIG_ENDIAN) && LittleEndian) || ((__BYTE_ORDER == __LITTLE_ENDIAN) && !LittleEndian)); do { fread(&typ,1,1,fid); fread(&len,4,1,fid); if (flagSWAP) bswap_32(len); fseek(fid,len,SEEK_CUR); } while (len); uint8_t *typlist = (uint8_t*)H1; /* char *varlist = H1+NS; char *srtlist; char *fmtlist = H1+NS*36+2; char *lbllist = H1+NS*(36+fmtlen)+2; */ mxArray **R = (mxArray**) mxMalloc(NS*sizeof(mxArray*)); size_t *bi = (size_t*) malloc((NS+1)*sizeof(size_t*)); const char **ListOfVarNames = (const char**)malloc(NS * sizeof(char*)); bi[0] = 0; for (size_t k = 0; k < NS; k++) { size_t sz; ListOfVarNames[k] = H1+NS+33*k; switch (typlist[k]) { case 0xfb: sz = 1; break; case 0xfc: sz = 2; break; case 0xfd: sz = 4; break; case 0xfe: sz = 4; break; case 0xff: sz = 8; break; default: sz = typlist[k]; } bi[k+1] = bi[k]+sz; } // data uint8_t *data = (uint8_t *) malloc(bi[NS] * M); fread(data, bi[NS], M, fid); char *f = (char*)malloc(bi[NS]+1); for (size_t k = 0; k < NS; k++) { switch (typlist[k]) { case 0xfb: R[k] = mxCreateDoubleMatrix(M, 1, mxREAL); for (size_t m = 0; m < M; m++) { int8_t d = *(int8_t*)(data+bi[k]+m*bi[NS]); ((double*)mxGetData(R[k]))[m] = (d>100) ? NaN : d; } break; case 0xfc: R[k] = mxCreateDoubleMatrix(M, 1, mxREAL); if (flagSWAP) for (size_t m = 0; m < M; m++) { int16_t d = (int16_t) bswap_16(*(uint16_t*)(data+bi[k]+m*bi[NS])); ((double*)mxGetData(R[k]))[m] = (d>32740) ? NaN : d; } else for (size_t m = 0; m < M; m++) { int16_t d = *(int16_t*)(data+bi[k]+m*bi[NS]); ((double*)mxGetData(R[k]))[m] = (d>32740) ? NaN : d; } break; case 0xfd: R[k] = mxCreateDoubleMatrix(M, 1, mxREAL); if (flagSWAP) for (size_t m = 0; m < M; m++) { int32_t d = (int32_t)bswap_32(*(uint32_t*)(data+bi[k]+m*bi[NS])); ((double*)mxGetData(R[k]))[m] = (d>2147483620) ? NaN : d; } else for (size_t m = 0; m < M; m++) { int32_t d = *(int32_t*)(data+bi[k]+m*bi[NS]); ((double*)mxGetData(R[k]))[m] = (d>2147483620) ? NaN : d; } break; case 0xfe: R[k] = mxCreateNumericMatrix(M, 1, mxSINGLE_CLASS, mxREAL); if (flagSWAP) for (size_t m = 0; m < M; m++) { ((uint32_t*)mxGetData(R[k]))[m] = bswap_32(*(uint32_t*)(data+bi[k]+m*bi[NS]));; } else for (size_t m = 0; m < M; m++) { ((uint32_t*)mxGetData(R[k]))[m] = *(uint32_t*)(data+bi[k]+m*bi[NS]); } break; case 0xff: R[k] = mxCreateDoubleMatrix(M, 1, mxREAL); if (flagSWAP) for (size_t m = 0; m < M; m++) { ((uint64_t*)mxGetData(R[k]))[m] = bswap_64(*(uint64_t*)(data+bi[k]+m*bi[NS])); } else for (size_t m = 0; m < M; m++) { ((uint64_t*)mxGetData(R[k]))[m] = *(uint64_t*)(data+bi[k]+m*bi[NS]); } break; default: R[k] = mxCreateCellMatrix(M, 1); size_t sz = typlist[k]; for (size_t m = 0; m < M; m++) { memcpy(f, data+bi[k]+m*bi[NS], sz); f[sz] = 0; mxSetCell(R[k], m, mxCreateString(f)); } } } if (f) free(f); if (H1) free(H1); if (bi) free(bi); /* convert into output */ POutput[0] = mxCreateStructMatrix(1, 1, NS, ListOfVarNames); for (size_t k = 0; k < NS; k++) { mxSetField(POutput[0], 0, ListOfVarNames[k], R[k]); } if (ListOfVarNames) free(ListOfVarNames); } else if (H0[0]=='%' || H0[0]=='@') { /* ARFF */ uint32_t M=0; TYPE = ARFF; rewind(fid); char *H1 = NULL; count = 0; size_t ns = 0; char *vartyp = NULL; char **datestr = NULL; const char **ListOfVarNames = NULL; mxArray **R = NULL; size_t m = 0; while (!feof(fid)) { HeadLen0 = max(1024,HeadLen0*2); H1 = (char*)realloc(H1,HeadLen0); count += fread(H1+count,1,HeadLen0-count-1,fid); } H1[count] = 0; switch (H1[count-1]) { case 0x0a: case 0x0d: H1[count] = 0; break; default: H1[count] = 0x0a; } H1[count+1] = 0; char *line = strtok(H1,"\x0a\0x0d"); int status = 0; while (line) { if (!strncasecmp(line,"@relation",9)) { status = 1; } else if (status == 1 && !strncasecmp(line,"@attribute",10)) { if (ns<=NS) { ns = max(16, ns*2); ListOfVarNames = (const char**)realloc(ListOfVarNames,ns*sizeof(char*)); vartyp = (char*)realloc(vartyp,ns*sizeof(char)); R = (mxArray**) mxRealloc(R,ns*sizeof(mxArray*)); } size_t k = 10; char *p1, *p2; while (isspace(line[k])) k++; p1 = line+k; while (!isspace(line[k])) k++; line[k++]=0; while (isspace(line[k])) k++; p2 = line+k; ListOfVarNames[NS] = p1; if (!strncasecmp(p2,"numeric",7)) { vartyp[NS] = 1; } else if (!strncasecmp(p2,"integer",7)) { vartyp[NS] = 2; } else if (!strncasecmp(p2,"real",4)) { vartyp[NS] = 3; } else if (!strncasecmp(p2,"string",6)) { vartyp[NS] = 4; } else if (!strncasecmp(p2,"{",1)) { vartyp[NS] = 5; } else if (!strncasecmp(p2,"date",4)) { vartyp[NS] = 6; datestr = (char**)realloc(datestr,(NS+1)*sizeof(char*)); p2+=4; while (isspace(*p2)) p2++; datestr[NS] = p2; if (p2[0]==34) { p2++; while (p2[0]!=34 && p2[0]) p2++; p2[1]=0; } } else if (!strncasecmp(p2,"relational",10)) { vartyp[NS] = 7; } else vartyp[NS] = 99; NS++; } else if (status == 1 && !strncasecmp(line,"@data",5)) { status = 2; char *p = line; while (*p) p++; // goto end of current line p++; // skip \x00 M = 0; while (*p) { if (p[0]==0x0a || p[0]==0x0d) { // count number of M++; // skip next char (deals with ) p+=2; } else p++; } for (size_t k=0; k0x40 && c<0x5b)) && !u ) return(NaN); int s,e; s = *(((char*)&x) + 7) & 0x80; // sign e = (*(((char*)&x) + 7) & 0x7f) - 64; // exponent *(((char*)&x) + 7) = 0; // mantisse x #if DEBUG mexPrintf("%x %x %016Lx\n",s,e,x); #endif double y = ldexp((double)x, e*4-56); if (s) return(-y); else return( y); #endif } /* D2XPT converts from little-endian IEEE to little-endian IBM format */ uint64_t d2xpt(double x) { uint64_t s,m; int e; #if __BYTE_ORDER == __BIG_ENDIAN mexErrMsgTxt("IEEE-to-IBM conversion on big-endian platform not supported, yet"); #elif __BYTE_ORDER==__LITTLE_ENDIAN if (x != x) return(0x2eLL << 56); // NaN - not a number if (fabs(x) == 1.0/0.0) return(0x5fLL << 56); // +-infinity if (x == 0.0) return(0); if (x > 0.0) s=0; else s=1; x = frexp(x,&e); #if DEBUG mexPrintf("d2xpt(%f)\n",x); #endif // see http://old.nabble.com/Re%3A-IBM-integer-and-double-formats-p20428979.html m = *(uint64_t*) &x; *(((char*)&m) + 6) &= 0x0f; // if (e) *(((char*)&m) + 6) |= 0x10; // reconstruct implicit leading '1' for normalized numbers m <<= (3-(-e & 3)); *(((uint8_t*)&m) + 7) = s ? 0x80 : 0; e = (e + (-e & 3)) / 4 + 64; if (e >= 128) return(0x5f); // overflow if (e < 0) { uint64_t h = 1<<(4*-e - 1); m = m / (2*h) + (m & h && m & (3*h-1) ? 1 : 0); e = 0; } return (((uint64_t)e)<<56 | m); #endif } double tm_time2gdf_time(struct tm *t) { /* based Octave's datevec.m it referes Peter Baum's algorithm at http://vsg.cape.com/~pbaum/date/date0.htm but the link is not working anymore as of 2008-12-03. Other links to Peter Baum's algorithm are http://www.rexswain.com/b2mmddyy.rex http://www.dpwr.net/forums/index.php?s=ecfa72e38be61327403126e23aeea7e5&showtopic=4309 */ int Y,M,s; //h,m, double D; D = t->tm_mday; M = t->tm_mon+1; Y = t->tm_year+1900; // Set start of year to March by moving Jan. and Feb. to previous year. // Correct for months > 12 by moving to subsequent years. Y += (int)fix ((M-14.0)/12); const int monthstart[] = {306, 337, 0, 31, 61, 92, 122, 153, 184, 214, 245, 275}; // Lookup number of days since start of the current year. D += monthstart[t->tm_mon % 12] + 60; // Add number of days to the start of the current year. Correct // for leap year every 4 years except centuries not divisible by 400. D += 365*Y + floor (Y/4) - floor (Y/100) + floor (Y/400); // Add fraction representing current second of the day. s = t->tm_hour*3600 + t->tm_min*60 + t->tm_sec; // s -= timezone; return(D + s/86400.0); } NaN/src/linear_model_matlab.h0000664002356700235670000000217711553522126016665 0ustar schloeglschloegl/* $Id$ Copyright (c) 2007-2009 The LIBLINEAR Project. Copyright (c) 2010 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from liblinear-1.51 in Jan 2010 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #ifdef __cplusplus extern "C" { #endif const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_); const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct); #ifdef __cplusplus } #endif NaN/src/linear.h0000664002356700235670000000521712512552402014157 0ustar schloeglschloegl/* $Id$ Copyright (c) 2007-2009 The LIBLINEAR Project. Copyright (c) 2010 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from liblinear-1.51 in Jan 2010 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #ifndef _LIBLINEAR_H #define _LIBLINEAR_H #ifdef __cplusplus extern "C" { #endif struct feature_node { int index; double value; }; struct problem { int l, n; int *y; struct feature_node **x; double bias; /* < 0 if no bias term */ double *W; /* instance weight */ }; enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR }; /* solver_type */ struct parameter { int solver_type; /* these are for training only */ double eps; /* stopping criteria */ double C; int nr_weight; int *weight_label; double* weight; }; struct model { struct parameter param; int nr_class; /* number of classes */ int nr_feature; double *w; int *label; /* label of each class */ double bias; }; struct model* train(const struct problem *prob, const struct parameter *param); void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, int *target); int predict_values(const struct model *model_, const struct feature_node *x, double* dec_values); int predict(const struct model *model_, const struct feature_node *x); int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates); int save_model(const char *model_file_name, const struct model *model_); struct model *load_model(const char *model_file_name); int get_nr_feature(const struct model *model_); int get_nr_class(const struct model *model_); void get_labels(const struct model *model_, int* label); void destroy_model(struct model *model_); void destroy_param(struct parameter *param); const char *check_parameter(const struct parameter *param); extern void (*liblinear_print_string) (const char *); #ifdef __cplusplus } #endif #endif /* _LIBLINEAR_H */ NaN/src/predict.c0000664002356700235670000001742412512552402014335 0ustar schloeglschloegl/* $Id: predict.c 12779 2015-04-12 20:26:42Z schloegl $ Copyright (c) 2007-2009 The LIBLINEAR Project. Copyright (c) 2010 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from liblinear-1.51 in Jan 2010 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #include #include #include #include "linear.h" #include "mex.h" #include "linear_model_matlab.h" #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; #endif #endif #define CMD_LEN 2048 #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) int col_format_flag; void read_sparse_instance(const mxArray *prhs, int index, struct feature_node *x, int feature_number, double bias) { int i, j, low, high; mwIndex *ir, *jc; double *samples; ir = mxGetIr(prhs); jc = mxGetJc(prhs); samples = mxGetPr(prhs); // each column is one instance j = 0; low = (int) jc[index], high = (int) jc[index+1]; for(i=low; i=0) { x[j].index = feature_number+1; x[j].value = bias; j++; } x[j].index = -1; } static void fake_answer(mxArray *plhs[]) { plhs[0] = mxCreateDoubleMatrix(0, 0, mxREAL); plhs[1] = mxCreateDoubleMatrix(0, 0, mxREAL); plhs[2] = mxCreateDoubleMatrix(0, 0, mxREAL); } void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag) { int label_vector_row_num, label_vector_col_num; int feature_number, testing_instance_number; int instance_index; double *ptr_instance, *ptr_label, *ptr_predict_label; double *ptr_prob_estimates, *ptr_dec_values, *ptr; struct feature_node *x; mxArray *pplhs[1]; // instance sparse matrix in row format int correct = 0; int total = 0; int nr_class=get_nr_class(model_); int nr_w; double *prob_estimates=NULL; if(nr_class==2 && model_->param.solver_type!=MCSVM_CS) nr_w=1; else nr_w=nr_class; // prhs[1] = testing instance matrix feature_number = get_nr_feature(model_); testing_instance_number = (int) mxGetM(prhs[1]); if(col_format_flag) { feature_number = (int) mxGetM(prhs[1]); testing_instance_number = (int) mxGetN(prhs[1]); } label_vector_row_num = (int) mxGetM(prhs[0]); label_vector_col_num = (int) mxGetN(prhs[0]); if(label_vector_row_num!=testing_instance_number) { mexPrintf("Length of label vector does not match # of instances.\n"); fake_answer(plhs); return; } if(label_vector_col_num!=1) { mexPrintf("label (1st argument) should be a vector (# of column is 1).\n"); fake_answer(plhs); return; } ptr_instance = mxGetPr(prhs[1]); ptr_label = mxGetPr(prhs[0]); // transpose instance matrix if(mxIsSparse(prhs[1])) { if(col_format_flag) { pplhs[0] = (mxArray *)prhs[1]; } else { mxArray *pprhs[1]; pprhs[0] = mxDuplicateArray(prhs[1]); if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) { mexPrintf("Error: cannot transpose testing instance matrix\n"); fake_answer(plhs); return; } } } else mexPrintf("Testing_instance_matrix must be sparse\n"); prob_estimates = Malloc(double, nr_class); plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); if(predict_probability_flag) plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); else plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL); ptr_predict_label = mxGetPr(plhs[0]); ptr_prob_estimates = mxGetPr(plhs[2]); ptr_dec_values = mxGetPr(plhs[2]); x = Malloc(struct feature_node, feature_number+2); for(instance_index=0;instance_indexbias); if(predict_probability_flag) { v = predict_probability(model_, x, prob_estimates); ptr_predict_label[instance_index] = v; for(i=0;i 5 || nrhs < 3) { exit_with_help(); fake_answer(plhs); return; } if(nrhs == 5) { mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1); if(strcmp(cmd, "col") == 0) { col_format_flag = 1; } } if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) { mexPrintf("Error: label vector and instance matrix must be double\n"); fake_answer(plhs); return; } if(mxIsStruct(prhs[2])) { const char *error_msg; // parse options if(nrhs>=4) { int i, argc = 1; char *argv[CMD_LEN/2]; // put options in argv[] mxGetString(prhs[3], cmd, mxGetN(prhs[3]) + 1); if((argv[argc] = strtok(cmd, " ")) != NULL) while((argv[++argc] = strtok(NULL, " ")) != NULL) ; for(i=1;i=argc) { exit_with_help(); fake_answer(plhs); return; } switch(argv[i-1][1]) { case 'b': prob_estimate_flag = atoi(argv[i]); break; default: mexPrintf("unknown option\n"); exit_with_help(); fake_answer(plhs); return; } } } model_ = Malloc(struct model, 1); error_msg = matlab_matrix_to_model(model_, prhs[2]); if(error_msg) { mexPrintf("Error: can't read model: %s\n", error_msg); destroy_model(model_); fake_answer(plhs); return; } if(prob_estimate_flag) { if(model_->param.solver_type!=L2R_LR) { mexPrintf("probability output is only supported for logistic regression\n"); prob_estimate_flag=0; } } if(mxIsSparse(prhs[1])) do_predict(plhs, prhs, model_, prob_estimate_flag); else { mexPrintf("Testing_instance_matrix must be sparse\n"); fake_answer(plhs); } // destroy model_ destroy_model(model_); } else { mexPrintf("model file should be a struct array\n"); fake_answer(plhs); } return; } NaN/src/covm_mex.cpp0000664002356700235670000004617512540557475015105 0ustar schloeglschloegl/* //------------------------------------------------------------------- // C-MEX implementation of COVM - this function is part of the NaN-toolbox. // // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, see . // // // covm: in-product of matrices, NaN are skipped. // usage: // [cc,nn] = covm_mex(X,Y,flag,W); // // Input: // - X: // - Y: [optional], if empty, Y=X; // - flag: if not empty, it is set to 1 if some NaN was observed // - W: weight vector to compute weighted correlation // // Output: // - CC = X' * sparse(diag(W)) * Y while NaN's are skipped // - NN = real(~isnan(X)')*sparse(diag(W))*real(~isnan(Y)) count of valid (non-NaN) elements // computed more efficiently // // $Id: covm_mex.cpp 12826 2015-06-18 15:09:49Z schloegl $ // Copyright (C) 2009,2010,2011 Alois Schloegl // This function is part of the NaN-toolbox // http://pub.ist.ac.at/~schloegl/matlab/NaN/ // //------------------------------------------------------------------- */ #ifdef __GNUC__ #include #endif #include #include "mex.h" /*#define NO_FLAG*/ void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) { double *X0=NULL, *Y0=NULL, *W=NULL; double *CC; double *NN = NULL; size_t rX,cX,rY,cY; size_t i; char flag_isNaN = 0; int ACC_LEVEL; /*********** check input arguments *****************/ // check for proper number of input and output arguments if ((PInputCount <= 0) || (PInputCount > 5)) { mexPrintf("usage: [CC,NN] = covm_mex(X [,Y [,flag [,W [,'E']]]])\n\n"); mexPrintf("Do not use COVM_MEX directly, use COVM instead. \n"); /* mexPrintf("\nCOVM_MEX computes the covariance matrix of real matrices and skips NaN's\n"); mexPrintf("\t[CC,NN] = covm_mex(...)\n\t\t computes CC=X'*Y, NN contains the number of not-NaN elements\n"); mexPrintf("\t\t CC./NN is the unbiased covariance matrix\n"); mexPrintf("\t... = covm_mex(X,Y,...)\n\t\t computes CC=X'*sparse(diag(W))*Y, number of rows of X and Y must match\n"); mexPrintf("\t... = covm_mex(X,[], ...)\n\t\t computes CC=X'*sparse(diag(W))*X\n"); mexPrintf("\t... = covm_mex(...,flag,...)\n\t\t if flag is not empty, it is set to 1 if some NaN occured in X or Y\n"); mexPrintf("\t... = covm_mex(...,W)\n\t\t W to compute weighted covariance, number of elements must match the number of rows of X\n"); mexPrintf("\t\t if isempty(W), all weights are 1\n"); mexPrintf("\t[CC,NN]=covm_mex(X,Y,flag,W)\n"); */ return; } if (POutputCount > 2) mexErrMsgTxt("covm.MEX has 1 to 2 output arguments."); // get 1st argument if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]) && !mxIsSparse(PInputs[0]) ) X0 = mxGetPr(PInputs[0]); else mexErrMsgTxt("First argument must be non-sparse REAL/DOUBLE."); rX = mxGetM(PInputs[0]); cX = mxGetN(PInputs[0]); // get 2nd argument if (PInputCount > 1) { if (!mxGetNumberOfElements(PInputs[1])) ; // Y0 = NULL; else if (mxIsDouble(PInputs[1]) && !mxIsComplex(PInputs[1])) Y0 = mxGetPr(PInputs[1]); else mexErrMsgTxt("Second argument must be REAL/DOUBLE."); } // get weight vector for weighted sumskipnan if (PInputCount > 3) { // get 4th argument size_t nW = mxGetNumberOfElements(PInputs[3]); if (!nW) ; else if (nW == rX) W = mxGetPr(PInputs[3]); else mexErrMsgTxt("number of elements in W must match numbers of rows in X"); } #ifdef __GNUC__ ACC_LEVEL = 0; { mxArray *LEVEL = NULL; int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); if (!s) { ACC_LEVEL = (int) mxGetScalar(LEVEL); } mxDestroyArray(LEVEL); } // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); #endif if (Y0==NULL) { Y0 = X0; rY = rX; cY = cX; } else { rY = mxGetM(PInputs[1]); cY = mxGetN(PInputs[1]); } if (rX != rY) mexErrMsgTxt("number of rows in X and Y do not match"); /*********** create output arguments *****************/ POutput[0] = mxCreateDoubleMatrix(cX, cY, mxREAL); CC = mxGetPr(POutput[0]); if (POutputCount > 1) { POutput[1] = mxCreateDoubleMatrix(cX, cY, mxREAL); NN = mxGetPr(POutput[1]); } /*********** compute covariance *****************/ #if 0 /*------ version 1 --------------------- this solution is slower than the alternative solution below for transposed matrices, this might be faster. */ for (k=0; k 2) && mxGetNumberOfElements(PInputs[2])) { // set FLAG_NANS_OCCURED switch (mxGetClassID(PInputs[2])) { case mxDOUBLE_CLASS: *(double*)mxGetData(PInputs[2]) = 1.0; break; case mxSINGLE_CLASS: *(float*)mxGetData(PInputs[2]) = 1.0; break; case mxLOGICAL_CLASS: case mxCHAR_CLASS: case mxINT8_CLASS: case mxUINT8_CLASS: *(char*)mxGetData(PInputs[2]) = 1; break; #ifdef __GNUC__ case mxINT16_CLASS: case mxUINT16_CLASS: *(uint16_t*)mxGetData(PInputs[2]) = 1; break; case mxINT32_CLASS: case mxUINT32_CLASS: *(uint32_t*)mxGetData(PInputs[2])= 1; break; case mxINT64_CLASS: case mxUINT64_CLASS: *(uint64_t*)mxGetData(PInputs[2]) = 1; break; case mxFUNCTION_CLASS: case mxUNKNOWN_CLASS: case mxCELL_CLASS: case mxSTRUCT_CLASS: #endif default: mexPrintf("Type of 3rd input argument cannot be used to return status of NaN occurence."); } } #endif #endif } NaN/src/linear_model_matlab.c0000664002356700235670000001074611553522126016661 0ustar schloeglschloegl/* $Id: linear_model_matlab.c 8223 2011-04-20 09:16:06Z schloegl $ Copyright (c) 2007-2009 The LIBLINEAR Project. Copyright (c) 2010 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from liblinear-1.51 in Jan 2010 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #include #include #include "linear.h" #include "mex.h" #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; #endif #endif #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) #define NUM_OF_RETURN_FIELD 6 static const char *field_names[] = { "Parameters", "nr_class", "nr_feature", "bias", "Label", "w", }; #ifdef __cplusplus extern "C" { #endif const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_) { int i; int nr_w; double *ptr; mxArray *return_model, **rhs; int out_id = 0; int n, w_size; rhs = (mxArray **)mxMalloc(sizeof(mxArray *)*NUM_OF_RETURN_FIELD); /* Parameters */ /* for now, only solver_type is needed */ rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); ptr[0] = model_->param.solver_type; out_id++; /* nr_class */ rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); ptr[0] = model_->nr_class; out_id++; if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS) nr_w=1; else nr_w=model_->nr_class; /* nr_feature */ rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); ptr[0] = model_->nr_feature; out_id++; /* bias */ rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); ptr[0] = model_->bias; out_id++; if(model_->bias>=0) n=model_->nr_feature+1; else n=model_->nr_feature; w_size = n; /* Label */ if(model_->label) { rhs[out_id] = mxCreateDoubleMatrix(model_->nr_class, 1, mxREAL); ptr = mxGetPr(rhs[out_id]); for(i = 0; i < model_->nr_class; i++) ptr[i] = model_->label[i]; } else rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); out_id++; /* w */ rhs[out_id] = mxCreateDoubleMatrix(nr_w, w_size, mxREAL); ptr = mxGetPr(rhs[out_id]); for(i = 0; i < w_size*nr_w; i++) ptr[i]=model_->w[i]; out_id++; /* Create a struct matrix contains NUM_OF_RETURN_FIELD fields */ return_model = mxCreateStructMatrix(1, 1, NUM_OF_RETURN_FIELD, field_names); /* Fill struct matrix with input arguments */ for(i = 0; i < NUM_OF_RETURN_FIELD; i++) mxSetField(return_model,0,field_names[i],mxDuplicateArray(rhs[i])); /* return */ plhs[0] = return_model; mxFree(rhs); return NULL; } const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct) { int i, num_of_fields; int nr_w; double *ptr; int id = 0; int n, w_size; mxArray **rhs; num_of_fields = mxGetNumberOfFields(matlab_struct); rhs = (mxArray **) mxMalloc(sizeof(mxArray *)*num_of_fields); for(i=0;inr_class=0; nr_w=0; model_->nr_feature=0; model_->w=NULL; model_->label=NULL; /* Parameters */ ptr = mxGetPr(rhs[id]); model_->param.solver_type = (int)ptr[0]; id++; /* nr_class */ ptr = mxGetPr(rhs[id]); model_->nr_class = (int)ptr[0]; id++; if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS) nr_w=1; else nr_w=model_->nr_class; /* nr_feature */ ptr = mxGetPr(rhs[id]); model_->nr_feature = (int)ptr[0]; id++; /* bias */ ptr = mxGetPr(rhs[id]); model_->bias = (int)ptr[0]; id++; if(model_->bias>=0) n=model_->nr_feature+1; else n=model_->nr_feature; w_size = n; ptr = mxGetPr(rhs[id]); model_->label=Malloc(int, model_->nr_class); for(i=0; inr_class; i++) model_->label[i]=(int)ptr[i]; id++; ptr = mxGetPr(rhs[id]); model_->w=Malloc(double, w_size*nr_w); for(i = 0; i < w_size*nr_w; i++) model_->w[i]=ptr[i]; id++; mxFree(rhs); return NULL; } #ifdef __cplusplus } #endif NaN/src/make.m0000775002356700235670000000435411553522126013637 0ustar schloeglschloeglfunction make(arg1) % This make.m is used for Matlab under Windows % $Id: make.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2010,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % add -largeArrayDims on 64-bit machines if (nargin>0 && strcmp(arg1,'clean')), if strcmp(computer,'PCWIN') dos('del *.obj'); dos('del *.mex*'); else unix('rm *.o'); unix('rm *.mex*'); end; return; end; mex covm_mex.cpp mex sumskipnan_mex.cpp mex histo_mex.cpp mex kth_element.cpp mex str2array.cpp mex xptopen.cpp mex -c svm.cpp mex -c svm_model_matlab.c mex -c tron.cpp mex -c linear.cpp mex -c linear_model_matlab.c if strcmp(computer,'PCWIN') && ~exist('OCTAVE_VERSION','builtin'), mex svmtrain_mex.cpp svm.obj svm_model_matlab.obj mex svmpredict_mex.cpp svm.obj svm_model_matlab.obj if ~exist('LAPACK/daxpy.f','file') || ~exist('LAPACK/ddot.f','file') || ~exist('LAPACK/dscal.f','file') || ~exist('LAPACK/dnrm2.f','file'), fprintf(1,'The lapack functions daxpy, ddot, dscal, and dnrm2 are required.\n'); fprintf(1,'If some functions are missing, get them from here:\n'); if ~exist('LAPACK','dir') mkdir('LAPACK'); end; fprintf(1,'Get http://www.netlib.org/blas/daxpy.f and save to %s',fullfile(pwd,'LAPACK')); fprintf(1,'Get http://www.netlib.org/blas/ddot.f and save to %s',fullfile(pwd,'LAPACK')); fprintf(1,'Get http://www.netlib.org/blas/dscal.f and save to %s',fullfile(pwd,'LAPACK')); fprintf(1,'Get http://www.netlib.org/blas/dnrm2.f and save to %s',fullfile(pwd,'LAPACK')); fprintf(1,'Press any key to continue ... '\n); pause; end; mex -c LAPACK/daxpy.f mex -c LAPACK/ddot.f mex -c LAPACK/dscal.f mex -c LAPACK/dnrm2.f dos('copy train.c train.cpp'); mex('train.cpp','tron.obj','linear.obj','linear_model_matlab.obj','daxpy.obj','ddot.obj','dscal.obj','dnrm2.obj') dos('del *.obj'); else mex svmtrain_mex.cpp svm.o svm_model_matlab.o mex svmpredict_mex.cpp svm.o svm_model_matlab.o unix('cp train.c train.cpp'); mex train.cpp tron.o linear.o linear_model_matlab.o unix('rm *.o'); end NaN/src/tron.cpp0000664002356700235670000001376711553522126014237 0ustar schloeglschloegl/* $Id$ Copyright (c) 2007-2009 The LIBLINEAR Project. Copyright (c) 2010 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from liblinear-1.51 in Jan 2010 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #include #include #include #include #include "tron.h" #ifndef min template static inline T min(T x,T y) { return (x static inline T max(T x,T y) { return (x>y)?x:y; } #endif #ifdef __cplusplus extern "C" { #endif extern double dnrm2_(int *, double *, int *); extern double ddot_(int *, double *, int *, double *, int *); extern int daxpy_(int *, double *, double *, int *, double *, int *); extern int dscal_(int *, double *, double *, int *); #ifdef __cplusplus } #endif static void default_print(const char *buf) { fputs(buf,stdout); fflush(stdout); } void TRON::info(const char *fmt,...) { char buf[BUFSIZ]; va_list ap; va_start(ap,fmt); vsprintf(buf,fmt,ap); va_end(ap); (*tron_print_string)(buf); } TRON::TRON(const function *fun_obj, double eps, int max_iter) { this->fun_obj=const_cast(fun_obj); this->eps=eps; this->max_iter=max_iter; tron_print_string = default_print; } TRON::~TRON() { } void TRON::tron(double *w) { // Parameters for updating the iterates. double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75; // Parameters for updating the trust region size delta. double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4; int n = fun_obj->get_nr_variable(); int i, cg_iter; double delta, snorm, one=1.0; double alpha, f, fnew, prered, actred, gs; int search = 1, iter = 1, inc = 1; double *s = new double[n]; double *r = new double[n]; double *w_new = new double[n]; double *g = new double[n]; for (i=0; ifun(w); fun_obj->grad(w, g); delta = dnrm2_(&n, g, &inc); double gnorm1 = delta; double gnorm = gnorm1; if (gnorm <= eps*gnorm1) search = 0; iter = 1; while (iter <= max_iter && search) { cg_iter = trcg(delta, g, s, r); memcpy(w_new, w, sizeof(double)*n); daxpy_(&n, &one, s, &inc, w_new, &inc); gs = ddot_(&n, g, &inc, s, &inc); prered = -0.5*(gs-ddot_(&n, s, &inc, r, &inc)); fnew = fun_obj->fun(w_new); // Compute the actual reduction. actred = f - fnew; // On the first iteration, adjust the initial step bound. snorm = dnrm2_(&n, s, &inc); if (iter == 1) delta = min(delta, snorm); // Compute prediction alpha*snorm of the step. if (fnew - f - gs <= 0) alpha = sigma3; else alpha = max(sigma1, -0.5*(gs/(fnew - f - gs))); // Update the trust region bound according to the ratio of actual to predicted reduction. if (actred < eta0*prered) delta = min(max(alpha, sigma1)*snorm, sigma2*delta); else if (actred < eta1*prered) delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta)); else if (actred < eta2*prered) delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta)); else delta = max(delta, min(alpha*snorm, sigma3*delta)); info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter); if (actred > eta0*prered) { iter++; memcpy(w, w_new, sizeof(double)*n); f = fnew; fun_obj->grad(w, g); gnorm = dnrm2_(&n, g, &inc); if (gnorm <= eps*gnorm1) break; } if (f < -1.0e+32) { info("warning: f < -1.0e+32\n"); break; } if (fabs(actred) <= 0 && prered <= 0) { info("warning: actred and prered <= 0\n"); break; } if (fabs(actred) <= 1.0e-12*fabs(f) && fabs(prered) <= 1.0e-12*fabs(f)) { info("warning: actred and prered too small\n"); break; } } delete[] g; delete[] r; delete[] w_new; delete[] s; } int TRON::trcg(double delta, double *g, double *s, double *r) { int i, inc = 1; int n = fun_obj->get_nr_variable(); double one = 1; double *d = new double[n]; double *Hd = new double[n]; double rTr, rnewTrnew, alpha, beta, cgtol; for (i=0; iHv(d, Hd); alpha = rTr/ddot_(&n, d, &inc, Hd, &inc); daxpy_(&n, &alpha, d, &inc, s, &inc); if (dnrm2_(&n, s, &inc) > delta) { info("cg reaches trust region boundary\n"); alpha = -alpha; daxpy_(&n, &alpha, d, &inc, s, &inc); double std = ddot_(&n, s, &inc, d, &inc); double sts = ddot_(&n, s, &inc, s, &inc); double dtd = ddot_(&n, d, &inc, d, &inc); double dsq = delta*delta; double rad = sqrt(std*std + dtd*(dsq-sts)); if (std >= 0) alpha = (dsq - sts)/(std + rad); else alpha = (rad - std)/dtd; daxpy_(&n, &alpha, d, &inc, s, &inc); alpha = -alpha; daxpy_(&n, &alpha, Hd, &inc, r, &inc); break; } alpha = -alpha; daxpy_(&n, &alpha, Hd, &inc, r, &inc); rnewTrnew = ddot_(&n, r, &inc, r, &inc); beta = rnewTrnew/rTr; dscal_(&n, &beta, d, &inc); daxpy_(&n, &one, r, &inc, d, &inc); rTr = rnewTrnew; } delete[] d; delete[] Hd; return(cg_iter); } double TRON::norm_inf(int n, double *x) { double dmax = fabs(x[0]); for (int i=1; i= dmax) dmax = fabs(x[i]); return(dmax); } void TRON::set_print_string(void (*print_string) (const char *buf)) { tron_print_string = print_string; } NaN/src/linear.cpp0000664002356700235670000012372412512552402014516 0ustar schloeglschloegl/* $Id$ Copyright (c) 2007-2009 The LIBLINEAR Project. Copyright (c) 2010 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from liblinear-1.51 in Jan 2010 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #include #include #include #include #include #include "linear.h" #include "tron.h" typedef signed char schar; template static inline void swap(T& x, T& y) { T t=x; x=y; y=t; } #ifndef min template static inline T min(T x,T y) { return (x static inline T max(T x,T y) { return (x>y)?x:y; } #endif template static inline void clone(T*& dst, S* src, int n) { dst = new T[n]; memcpy((void *)dst,(void *)src,sizeof(T)*n); } #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) #define INF HUGE_VAL static void print_string_stdout(const char *s) { fputs(s,stdout); fflush(stdout); } void (*liblinear_print_string) (const char *) = &print_string_stdout; #if 1 static void info(const char *fmt,...) { char buf[BUFSIZ]; va_list ap; va_start(ap,fmt); vsprintf(buf,fmt,ap); va_end(ap); (*liblinear_print_string)(buf); } #else static void info(const char *fmt,...) {} #endif class l2r_lr_fun : public function { public: l2r_lr_fun(const problem *prob, double Cp, double Cn); ~l2r_lr_fun(); double fun(double *w); void grad(double *w, double *g); void Hv(double *s, double *Hs); int get_nr_variable(void); private: void Xv(double *v, double *Xv); void XTv(double *v, double *XTv); double *C; double *z; double *D; const problem *prob; }; l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn) { int i; int l=prob->l; int *y=prob->y; this->prob = prob; z = new double[l]; D = new double[l]; C = new double[l]; for (i=0; iW[i] * Cp; else C[i] = prob->W[i] * Cn; } } l2r_lr_fun::~l2r_lr_fun() { delete[] z; delete[] D; delete[] C; } double l2r_lr_fun::fun(double *w) { int i; double f=0; int *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); Xv(w, z); for(i=0;i= 0) f += C[i]*log(1 + exp(-yz)); else f += C[i]*(-yz+log(1 + exp(yz))); } f = 2*f; for(i=0;iy; int l=prob->l; int w_size=get_nr_variable(); for(i=0;in; } void l2r_lr_fun::Hv(double *s, double *Hs) { int i; int l=prob->l; int w_size=get_nr_variable(); double *wa = new double[l]; Xv(s, wa); for(i=0;il; feature_node **x=prob->x; for(i=0;iindex!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } } } void l2r_lr_fun::XTv(double *v, double *XTv) { int i; int l=prob->l; int w_size=get_nr_variable(); feature_node **x=prob->x; for(i=0;iindex!=-1) { XTv[s->index-1]+=v[i]*s->value; s++; } } } class l2r_l2_svc_fun : public function { public: l2r_l2_svc_fun(const problem *prob, double Cp, double Cn); ~l2r_l2_svc_fun(); double fun(double *w); void grad(double *w, double *g); void Hv(double *s, double *Hs); int get_nr_variable(void); private: void Xv(double *v, double *Xv); void subXv(double *v, double *Xv); void subXTv(double *v, double *XTv); double *C; double *z; double *D; int *I; int sizeI; const problem *prob; }; l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn) { int i; int l=prob->l; int *y=prob->y; this->prob = prob; z = new double[l]; D = new double[l]; C = new double[l]; I = new int[l]; for (i=0; iW[i] * Cp; else C[i] = prob->W[i] * Cn; } } l2r_l2_svc_fun::~l2r_l2_svc_fun() { delete[] z; delete[] D; delete[] C; delete[] I; } double l2r_l2_svc_fun::fun(double *w) { int i; double f=0; int *y=prob->y; int l=prob->l; int w_size=get_nr_variable(); Xv(w, z); for(i=0;i 0) f += C[i]*d*d; } f = 2*f; for(i=0;iy; int l=prob->l; int w_size=get_nr_variable(); sizeI = 0; for (i=0;in; } void l2r_l2_svc_fun::Hv(double *s, double *Hs) { int i; int l=prob->l; int w_size=get_nr_variable(); double *wa = new double[l]; subXv(s, wa); for(i=0;il; feature_node **x=prob->x; for(i=0;iindex!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } } } void l2r_l2_svc_fun::subXv(double *v, double *Xv) { int i; feature_node **x=prob->x; for(i=0;iindex!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } } } void l2r_l2_svc_fun::subXTv(double *v, double *XTv) { int i; int w_size=get_nr_variable(); feature_node **x=prob->x; for(i=0;iindex!=-1) { XTv[s->index-1]+=v[i]*s->value; s++; } } } // A coordinate descent algorithm for // multi-class support vector machines by Crammer and Singer // // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i // // where e^m_i = 0 if y_i = m, // e^m_i = 1 if y_i != m, // C^m_i = C if m = y_i, // C^m_i = 0 if m != y_i, // and w_m(\alpha) = \sum_i \alpha^m_i x_i // // Given: // x, y, C // eps is the stopping tolerance // // solution will be put in w #define GETI(i) (i) // To support weights for instances, use GETI(i) (i) class Solver_MCSVM_CS { public: Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000); ~Solver_MCSVM_CS(); void Solve(double *w); private: void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new); bool be_shrunk(int i, int m, int yi, double alpha_i, double minG); double *B, *C, *G; int w_size, l; int nr_class; int max_iter; double eps; const problem *prob; }; Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter) { this->w_size = prob->n; this->l = prob->l; this->nr_class = nr_class; this->eps = eps; this->max_iter = max_iter; this->prob = prob; this->B = new double[nr_class]; this->G = new double[nr_class]; this->C = new double[prob->l]; for(int i = 0; i < prob->l; i++) this->C[i] = prob->W[i] * weighted_C[prob->y[i]]; } Solver_MCSVM_CS::~Solver_MCSVM_CS() { delete[] B; delete[] G; delete[] C; } int compare_double(const void *a, const void *b) { if(*(double *)a > *(double *)b) return -1; if(*(double *)a < *(double *)b) return 1; return 0; } void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new) { int r; double *D; clone(D, B, active_i); if(yi < active_i) D[yi] += A_i*C_yi; qsort(D, active_i, sizeof(double), compare_double); double beta = D[0] - A_i*C_yi; for(r=1;rx[i]; QD[i] = 0; while(xi->index != -1) { QD[i] += (xi->value)*(xi->value); xi++; } active_size_i[i] = nr_class; y_index[i] = prob->y[i]; index[i] = i; } while(iter < max_iter) { double stopping = -INF; for(i=0;i 0) { for(m=0;mx[i]; while(xi->index!= -1) { double *w_i = &w[(xi->index-1)*nr_class]; for(m=0;mvalue); xi++; } double minG = INF; double maxG = -INF; for(m=0;m maxG) maxG = G[m]; } if(y_index[i] < active_size_i[i]) if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG) minG = G[y_index[i]]; for(m=0;mm) { if(!be_shrunk(i, active_size_i[i], y_index[i], alpha_i[alpha_index_i[active_size_i[i]]], minG)) { swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]); swap(G[m], G[active_size_i[i]]); if(y_index[i] == active_size_i[i]) y_index[i] = m; else if(y_index[i] == m) y_index[i] = active_size_i[i]; break; } active_size_i[i]--; } } } if(active_size_i[i] <= 1) { active_size--; swap(index[s], index[active_size]); s--; continue; } if(maxG-minG <= 1e-12) continue; else stopping = max(maxG - minG, stopping); for(m=0;m= 1e-12) { d_ind[nz_d] = alpha_index_i[m]; d_val[nz_d] = d; nz_d++; } } xi = prob->x[i]; while(xi->index != -1) { double *w_i = &w[(xi->index-1)*nr_class]; for(m=0;mvalue; xi++; } } } iter++; if(iter % 10 == 0) { info("."); } if(stopping < eps_shrink) { if(stopping < eps && start_from_all == true) break; else { active_size = l; for(i=0;i= max_iter) info("Warning: reaching max number of iterations\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0;i 0) nSV++; } for(i=0;iy[i]]; info("Objective value = %lf\n",v); info("nSV = %d\n",nSV); delete [] alpha; delete [] alpha_new; delete [] index; delete [] QD; delete [] d_ind; delete [] d_val; delete [] alpha_index; delete [] y_index; delete [] active_size_i; } // A coordinate descent algorithm for // L1-loss and L2-loss SVM dual problems // // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, // s.t. 0 <= alpha_i <= upper_bound_i, // // where Qij = yi yj xi^T xj and // D is a diagonal matrix // // In L1-SVM case: // upper_bound_i = Cp if y_i = 1 // upper_bound_i = Cn if y_i = -1 // D_ii = 0 // In L2-SVM case: // upper_bound_i = INF // D_ii = 1/(2*Cp) if y_i = 1 // D_ii = 1/(2*Cn) if y_i = -1 // // Given: // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w #undef GETI #define GETI(i) (i) // To support weights for instances, use GETI(i) (i) static void solve_l2r_l1l2_svc( const problem *prob, double *w, double eps, double Cp, double Cn, int solver_type) { int l = prob->l; int w_size = prob->n; int i, s, iter = 0; double C, d, G; double *QD = new double[l]; int max_iter = 1000; int *index = new int[l]; double *alpha = new double[l]; schar *y = new schar[l]; int active_size = l; // PG: projected gradient, for shrinking and stopping double PG; double PGmax_old = INF; double PGmin_old = -INF; double PGmax_new, PGmin_new; // default solver_type: L2R_L2LOSS_SVC_DUAL double *diag = new double[l]; double *upper_bound = new double[l]; double *C_ = new double[l]; for(i=0; iy[i]>0) C_[i] = prob->W[i] * Cp; else C_[i] = prob->W[i] * Cn; diag[i] = 0.5/C_[i]; upper_bound[i] = INF; } if(solver_type == L2R_L1LOSS_SVC_DUAL) { for(i=0; iy[i] > 0) { y[i] = +1; } else { y[i] = -1; } QD[i] = diag[GETI(i)]; feature_node *xi = prob->x[i]; while (xi->index != -1) { QD[i] += (xi->value)*(xi->value); xi++; } index[i] = i; } while (iter < max_iter) { PGmax_new = -INF; PGmin_new = INF; for (i=0; ix[i]; while(xi->index!= -1) { G += w[xi->index-1]*(xi->value); xi++; } G = G*yi-1; C = upper_bound[GETI(i)]; G += alpha[i]*diag[GETI(i)]; PG = 0; if (alpha[i] == 0) { if (G > PGmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) PG = G; } else if (alpha[i] == C) { if (G < PGmin_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) PG = G; } else PG = G; PGmax_new = max(PGmax_new, PG); PGmin_new = min(PGmin_new, PG); if(fabs(PG) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C); d = (alpha[i] - alpha_old)*yi; xi = prob->x[i]; while (xi->index != -1) { w[xi->index-1] += d*xi->value; xi++; } } } iter++; if(iter % 10 == 0) info("."); if(PGmax_new - PGmin_new <= eps) { if(active_size == l) break; else { active_size = l; info("*"); PGmax_old = INF; PGmin_old = -INF; continue; } } PGmax_old = PGmax_new; PGmin_old = PGmin_new; if (PGmax_old <= 0) PGmax_old = INF; if (PGmin_old >= 0) PGmin_old = -INF; } info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0; i 0) ++nSV; } info("Objective value = %lf\n",v/2); info("nSV = %d\n",nSV); delete [] upper_bound; delete [] diag; delete [] C_; delete [] QD; delete [] alpha; delete [] y; delete [] index; } // A coordinate descent algorithm for // L1-regularized L2-loss support vector classification // // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2, // // Given: // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w #undef GETI #define GETI(i) (i) // To support weights for instances, use GETI(i) (i) static void solve_l1r_l2_svc( problem *prob_col, double *w, double eps, double Cp, double Cn) { int l = prob_col->l; int w_size = prob_col->n; int j, s, iter = 0; int max_iter = 1000; int active_size = w_size; int max_num_linesearch = 20; double sigma = 0.01; double d, G_loss, G, H; double Gmax_old = INF; double Gmax_new; double Gmax_init = 0.0; double d_old, d_diff; double loss_old, loss_new; double appxcond, cond; int *index = new int[w_size]; schar *y = new schar[l]; double *b = new double[l]; // b = 1-ywTx double *xj_sq = new double[w_size]; feature_node *x; double *C = new double[l]; for(j=0; jy[j] > 0) { y[j] = 1; C[j] = prob_col->W[j] * Cp; } else { y[j] = -1; C[j] = prob_col->W[j] * Cn; } } for(j=0; jx[j]; while(x->index != -1) { int ind = x->index-1; double val = x->value; x->value *= y[ind]; // x->value stores yi*xij xj_sq[j] += C[GETI(ind)]*val*val; x++; } } while(iter < max_iter) { Gmax_new = 0; for(j=0; jx[j]; while(x->index != -1) { int ind = x->index-1; if(b[ind] > 0) { double val = x->value; double tmp = C[GETI(ind)]*val; G_loss -= tmp*b[ind]; H += tmp*val; } x++; } G_loss *= 2; G = G_loss; H *= 2; H = max(H, 1e-12); double Gp = G+1; double Gn = G-1; double violation = 0; if(w[j] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(w[j] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); // obtain Newton direction d if(Gp <= H*w[j]) d = -Gp/H; else if(Gn >= H*w[j]) d = -Gn/H; else d = -w[j]; if(fabs(d) < 1.0e-12) continue; double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; d_old = 0; int num_linesearch; for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) { d_diff = d_old - d; cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; appxcond = xj_sq[j]*d*d + G_loss*d + cond; if(appxcond <= 0) { x = prob_col->x[j]; while(x->index != -1) { b[x->index-1] += d_diff*x->value; x++; } break; } if(num_linesearch == 0) { loss_old = 0; loss_new = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; if(b[ind] > 0) loss_old += C[GETI(ind)]*b[ind]*b[ind]; double b_new = b[ind] + d_diff*x->value; b[ind] = b_new; if(b_new > 0) loss_new += C[GETI(ind)]*b_new*b_new; x++; } } else { loss_new = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; double b_new = b[ind] + d_diff*x->value; b[ind] = b_new; if(b_new > 0) loss_new += C[GETI(ind)]*b_new*b_new; x++; } } cond = cond + loss_new - loss_old; if(cond <= 0) break; else { d_old = d; d *= 0.5; delta *= 0.5; } } w[j] += d; // recompute b[] if line search takes too many steps if(num_linesearch >= max_num_linesearch) { info("#"); for(int i=0; ix[i]; while(x->index != -1) { b[x->index-1] -= w[i]*x->value; x++; } } } } if(iter == 0) Gmax_init = Gmax_new; iter++; if(iter % 10 == 0) info("."); if(Gmax_new <= eps*Gmax_init) { if(active_size == w_size) break; else { active_size = w_size; info("*"); Gmax_old = INF; continue; } } Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n", iter); if(iter >= max_iter) info("\nWARNING: reaching max number of iterations\n"); // calculate objective value double v = 0; int nnz = 0; for(j=0; jx[j]; while(x->index != -1) { x->value *= prob_col->y[x->index-1]; // restore x->value x++; } if(w[j] != 0) { v += fabs(w[j]); nnz++; } } for(j=0; j 0) v += C[GETI(j)]*b[j]*b[j]; info("Objective value = %lf\n", v); info("#nonzeros/#features = %d/%d\n", nnz, w_size); delete [] C; delete [] index; delete [] y; delete [] b; delete [] xj_sq; } // A coordinate descent algorithm for // L1-regularized logistic regression problems // // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)), // // Given: // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w #undef GETI #define GETI(i) (y[i]+1) // To support weights for instances, use GETI(i) (i) static void solve_l1r_lr( const problem *prob_col, double *w, double eps, double Cp, double Cn) { int l = prob_col->l; int w_size = prob_col->n; int j, s, iter = 0; int max_iter = 1000; int active_size = w_size; int max_num_linesearch = 20; double x_min = 0; double sigma = 0.01; double d, G, H; double Gmax_old = INF; double Gmax_new; double Gmax_init = 0.0; double sum1, appxcond1; double sum2, appxcond2; double cond; int *index = new int[w_size]; schar *y = new schar[l]; double *exp_wTx = new double[l]; double *exp_wTx_new = new double[l]; double *xj_max = new double[w_size]; double *C_sum = new double[w_size]; double *xjneg_sum = new double[w_size]; double *xjpos_sum = new double[w_size]; feature_node *x; double *C = new double[l]; for(j=0; jy[j] > 0) { y[j] = 1; C[j] = prob_col->W[j] * Cp; } else { y[j] = -1; C[j] = prob_col->W[j] * Cn; } } for(j=0; jx[j]; while(x->index != -1) { int ind = x->index-1; double val = x->value; x_min = min(x_min, val); xj_max[j] = max(xj_max[j], val); C_sum[j] += C[GETI(ind)]; if(y[ind] == -1) xjneg_sum[j] += C[GETI(ind)]*val; else xjpos_sum[j] += C[GETI(ind)]*val; x++; } } while(iter < max_iter) { Gmax_new = 0; for(j=0; jx[j]; while(x->index != -1) { int ind = x->index-1; double exp_wTxind = exp_wTx[ind]; double tmp1 = x->value/(1+exp_wTxind); double tmp2 = C[GETI(ind)]*tmp1; double tmp3 = tmp2*exp_wTxind; sum2 += tmp2; sum1 += tmp3; H += tmp1*tmp3; x++; } G = -sum2 + xjneg_sum[j]; double Gp = G+1; double Gn = G-1; double violation = 0; if(w[j] == 0) { if(Gp < 0) violation = -Gp; else if(Gn > 0) violation = Gn; else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; swap(index[s], index[active_size]); s--; continue; } } else if(w[j] > 0) violation = fabs(Gp); else violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); // obtain Newton direction d if(Gp <= H*w[j]) d = -Gp/H; else if(Gn >= H*w[j]) d = -Gn/H; else d = -w[j]; if(fabs(d) < 1.0e-12) continue; d = min(max(d,-10.0),10.0); double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; int num_linesearch; for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) { cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; if(x_min >= 0) { double tmp = exp(d*xj_max[j]); appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j]; appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j]; if(min(appxcond1,appxcond2) <= 0) { x = prob_col->x[j]; while(x->index != -1) { exp_wTx[x->index-1] *= exp(d*x->value); x++; } break; } } cond += d*xjneg_sum[j]; int i = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; double exp_dx = exp(d*x->value); exp_wTx_new[i] = exp_wTx[ind]*exp_dx; cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); x++; i++; } if(cond <= 0) { int i = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; exp_wTx[ind] = exp_wTx_new[i]; x++; i++; } break; } else { d *= 0.5; delta *= 0.5; } } w[j] += d; // recompute exp_wTx[] if line search takes too many steps if(num_linesearch >= max_num_linesearch) { info("#"); for(int i=0; ix[i]; while(x->index != -1) { exp_wTx[x->index-1] += w[i]*x->value; x++; } } for(int i=0; i= max_iter) info("\nWARNING: reaching max number of iterations\n"); // calculate objective value double v = 0; int nnz = 0; for(j=0; jl; int n = prob->n; int nnz = 0; int *col_ptr = new int[n+1]; feature_node *x_space; prob_col->l = l; prob_col->n = n; prob_col->y = new int[l]; prob_col->x = new feature_node*[n]; prob_col->W = new double[l]; for(i=0; iy[i] = prob->y[i]; prob_col->W[i] = prob->W[i]; } for(i=0; ix[i]; while(x->index != -1) { nnz++; col_ptr[x->index]++; x++; } } for(i=1; ix[i] = &x_space[col_ptr[i]]; for(i=0; ix[i]; while(x->index != -1) { int ind = x->index-1; x_space[col_ptr[ind]].index = i+1; // starts from 1 x_space[col_ptr[ind]].value = x->value; col_ptr[ind]++; x++; } } for(i=0; il; int max_nr_class = 16; int nr_class = 0; int *label = Malloc(int,max_nr_class); int *count = Malloc(int,max_nr_class); int *data_label = Malloc(int,l); int i; for(i=0;iy[i]; int j; for(j=0;jeps; int pos = 0; int neg = 0; for(int i=0;il;i++) if(prob->y[i]==+1) pos++; neg = prob->l - pos; function *fun_obj=NULL; switch(param->solver_type) { case L2R_LR: { fun_obj=new l2r_lr_fun(prob, Cp, Cn); TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l); tron_obj.set_print_string(liblinear_print_string); tron_obj.tron(w); delete fun_obj; break; } case L2R_L2LOSS_SVC: { fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn); TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l); tron_obj.set_print_string(liblinear_print_string); tron_obj.tron(w); delete fun_obj; break; } case L2R_L2LOSS_SVC_DUAL: solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL); break; case L2R_L1LOSS_SVC_DUAL: solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL); break; case L1R_L2LOSS_SVC: { problem prob_col; feature_node *x_space = NULL; transpose(prob, &x_space ,&prob_col); solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn); delete [] prob_col.y; delete [] prob_col.x; delete [] prob_col.W; delete [] x_space; break; } case L1R_LR: { problem prob_col; feature_node *x_space = NULL; transpose(prob, &x_space ,&prob_col); solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn); delete [] prob_col.y; delete [] prob_col.x; delete [] prob_col.W; delete [] x_space; break; } default: fprintf(stderr, "Error: unknown solver_type\n"); break; } } // // Remove zero weighed data as libsvm and some liblinear solvers require C > 0. // static void remove_zero_weight(problem *newprob, const problem *prob) { int i; int l = 0; for(i=0;il;i++) if(prob->W[i] > 0) l++; *newprob = *prob; newprob->l = l; newprob->x = Malloc(feature_node*,l); newprob->y = Malloc(int,l); newprob->W = Malloc(double,l); int j = 0; for(i=0;il;i++) if(prob->W[i] > 0) { newprob->x[j] = prob->x[i]; newprob->y[j] = prob->y[i]; newprob->W[j] = prob->W[i]; j++; } } // // Interface functions // model* train(const problem *prob, const parameter *param) { problem newprob; remove_zero_weight(&newprob, prob); prob = &newprob; int i,j; int l = prob->l; int n = prob->n; int w_size = prob->n; model *model_ = Malloc(model,1); if(prob->bias>=0) model_->nr_feature=n-1; else model_->nr_feature=n; model_->param = *param; model_->bias = prob->bias; int nr_class; int *label = NULL; int *start = NULL; int *count = NULL; int *perm = Malloc(int,l); // group training data of the same class group_classes(prob,&nr_class,&label,&start,&count,perm); model_->nr_class=nr_class; model_->label = Malloc(int,nr_class); for(i=0;ilabel[i] = label[i]; // calculate weighted C double *weighted_C = Malloc(double, nr_class); for(i=0;iC; for(i=0;inr_weight;i++) { for(j=0;jweight_label[i] == label[j]) break; if(j == nr_class) fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]); else weighted_C[j] *= param->weight[i]; } // constructing the subproblem feature_node **x = Malloc(feature_node *,l); double *W = Malloc(double,l); for(i=0;ix[perm[i]]; W[i] = prob->W[perm[i]]; } int k; problem sub_prob; sub_prob.l = l; sub_prob.n = n; sub_prob.x = Malloc(feature_node *,sub_prob.l); sub_prob.y = Malloc(int,sub_prob.l); sub_prob.W = Malloc(double,sub_prob.l); for(k=0; ksolver_type == MCSVM_CS) { model_->w=Malloc(double, n*nr_class); for(i=0;ieps); Solver.Solve(model_->w); } else { if(nr_class == 2) { model_->w=Malloc(double, w_size); int e0 = start[0]+count[0]; k=0; for(; kw[0], weighted_C[0], weighted_C[1]); } else { model_->w=Malloc(double, w_size*nr_class); double *w=Malloc(double, w_size); for(i=0;iC); for(int j=0;jw[j*nr_class+i] = w[j]; } free(w); } } free(x); free(W); free(label); free(start); free(count); free(perm); free(sub_prob.x); free(sub_prob.y); free(sub_prob.W); free(weighted_C); free(newprob.x); free(newprob.y); free(newprob.W); return model_; } void destroy_model(struct model *model_) { if(model_->w != NULL) free(model_->w); if(model_->label != NULL) free(model_->label); free(model_); } static const char *solver_type_table[]= { "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC","L2R_L1LOSS_SVC_DUAL","MCSVM_CS", "L1R_L2LOSS_SVC","L1R_LR", NULL }; int save_model(const char *model_file_name, const struct model *model_) { int i; int nr_feature=model_->nr_feature; int n; const parameter& param = model_->param; if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; int w_size = n; FILE *fp = fopen(model_file_name,"w"); if(fp==NULL) return -1; int nr_w; if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS) nr_w=1; else nr_w=model_->nr_class; fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]); fprintf(fp, "nr_class %d\n", model_->nr_class); fprintf(fp, "label"); for(i=0; inr_class; i++) fprintf(fp, " %d", model_->label[i]); fprintf(fp, "\n"); fprintf(fp, "nr_feature %d\n", nr_feature); fprintf(fp, "bias %.16g\n", model_->bias); fprintf(fp, "w\n"); for(i=0; iw[i*nr_w+j]); fprintf(fp, "\n"); } if (ferror(fp) != 0 || fclose(fp) != 0) return -1; else return 0; } struct model *load_model(const char *model_file_name) { FILE *fp = fopen(model_file_name,"r"); if(fp==NULL) return NULL; int i; int nr_feature; int n; int nr_class; double bias; model *model_ = Malloc(model,1); parameter& param = model_->param; model_->label = NULL; char cmd[81]; while(1) { fscanf(fp,"%80s",cmd); if(strcmp(cmd,"solver_type")==0) { fscanf(fp,"%80s",cmd); int i; for(i=0;solver_type_table[i];i++) { if(strcmp(solver_type_table[i],cmd)==0) { param.solver_type=i; break; } } if(solver_type_table[i] == NULL) { fprintf(stderr,"unknown solver type.\n"); free(model_->label); free(model_); return NULL; } } else if(strcmp(cmd,"nr_class")==0) { fscanf(fp,"%d",&nr_class); model_->nr_class=nr_class; } else if(strcmp(cmd,"nr_feature")==0) { fscanf(fp,"%d",&nr_feature); model_->nr_feature=nr_feature; } else if(strcmp(cmd,"bias")==0) { fscanf(fp,"%lf",&bias); model_->bias=bias; } else if(strcmp(cmd,"w")==0) { break; } else if(strcmp(cmd,"label")==0) { int nr_class = model_->nr_class; model_->label = Malloc(int,nr_class); for(int i=0;ilabel[i]); } else { fprintf(stderr,"unknown text in model file: [%s]\n",cmd); free(model_); return NULL; } } nr_feature=model_->nr_feature; if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; int w_size = n; int nr_w; if(nr_class==2 && param.solver_type != MCSVM_CS) nr_w = 1; else nr_w = nr_class; model_->w=Malloc(double, w_size*nr_w); for(i=0; iw[i*nr_w+j]); fscanf(fp, "\n"); } if (ferror(fp) != 0 || fclose(fp) != 0) return NULL; return model_; } int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values) { int idx; int n; if(model_->bias>=0) n=model_->nr_feature+1; else n=model_->nr_feature; double *w=model_->w; int nr_class=model_->nr_class; int i; int nr_w; if(nr_class==2 && model_->param.solver_type != MCSVM_CS) nr_w = 1; else nr_w = nr_class; const feature_node *lx=x; for(i=0;iindex)!=-1; lx++) { // the dimension of testing data may exceed that of training if(idx<=n) for(i=0;ivalue; } if(nr_class==2) return (dec_values[0]>0)?model_->label[0]:model_->label[1]; else { int dec_max_idx = 0; for(i=1;i dec_values[dec_max_idx]) dec_max_idx = i; } return model_->label[dec_max_idx]; } } int predict(const model *model_, const feature_node *x) { double *dec_values = Malloc(double, model_->nr_class); int label=predict_values(model_, x, dec_values); free(dec_values); return label; } int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates) { if(model_->param.solver_type==L2R_LR) { int i; int nr_class=model_->nr_class; int nr_w; if(nr_class==2) nr_w = 1; else nr_w = nr_class; int label=predict_values(model_, x, prob_estimates); for(i=0;iweight_label != NULL) free(param->weight_label); if(param->weight != NULL) free(param->weight); } const char *check_parameter(const parameter *param) { if(param->eps <= 0) return "eps <= 0"; if(param->C <= 0) return "C <= 0"; if(param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC_DUAL && param->solver_type != L2R_L2LOSS_SVC && param->solver_type != L2R_L1LOSS_SVC_DUAL && param->solver_type != MCSVM_CS && param->solver_type != L1R_L2LOSS_SVC && param->solver_type != L1R_LR) return "unknown solver type"; return NULL; } void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target) { int i; int *fold_start = Malloc(int,nr_fold+1); int l = prob->l; int *perm = Malloc(int,l); for(i=0;ibias; subprob.n = prob->n; subprob.l = l-(end-begin); subprob.x = Malloc(struct feature_node*,subprob.l); subprob.y = Malloc(int,subprob.l); subprob.W = Malloc(double,subprob.l); k=0; for(j=0;jx[perm[j]]; subprob.y[k] = prob->y[perm[j]]; subprob.W[k] = prob->W[perm[j]]; ++k; } for(j=end;jx[perm[j]]; subprob.y[k] = prob->y[perm[j]]; subprob.W[k] = prob->W[perm[j]]; ++k; } struct model *submodel = train(&subprob,param); for(j=begin;jx[perm[j]]); destroy_model(submodel); free(subprob.x); free(subprob.y); free(subprob.W); } free(fold_start); free(perm); } int get_nr_feature(const model *model_) { return model_->nr_feature; } int get_nr_class(const model *model_) { return model_->nr_class; } void get_labels(const model *model_, int* label) { if (model_->label != NULL) for(int i=0;inr_class;i++) label[i] = model_->label[i]; } NaN/src/histo_mex.cpp0000664002356700235670000002604312516156436015252 0ustar schloeglschloegl//------------------------------------------------------------------- // C-MEX implementation of Histogram - this function is part of the NaN-toolbox. // // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, see . // // // histo_mex: computes histogram // // Input: // - data matrix // - flag for row-wise histogram // // Output: // - histogram // HIS.X // HIS.H // // $Id: histo_mex.cpp 12790 2015-04-23 11:53:02Z schloegl $ // Copyright (C) 2009,2010,2011 Alois Schloegl // This function is part of the NaN-toolbox // http://pub.ist.ac.at/~schloegl/matlab/NaN/ // //------------------------------------------------------------------- /* TODO: speed: its slower than the m-functions histo2/3/4 |-> use a more efficient sorting function resembling of histo3 for multicolumn data. support of complex data and char-strings */ #include #include #include #include "mex.h" #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; #endif #endif struct sort_t { uint8_t *Table; // data table size_t Size; // sizeof elements e.g. 4 for single size_t Stride; // for multicolumn data size_t N; // number of rows mxClassID Type; // data type } Sort; //inline int compare(const sqize_t *a, const size_t *b) { int compare(const void *a, const void *b) { int z = 0; size_t i = 0; size_t ix1 = *(size_t*)a; size_t ix2 = *(size_t*)b; while ((if2) z = 1; break; } case mxUINT32_CLASS: { uint32_t f1,f2; f1 = ((uint32_t*)Sort.Table)[ix1]; f2 = ((uint32_t*)Sort.Table)[ix2]; if (f1f2) z = 1; break; } case mxINT64_CLASS: { int64_t f1,f2; f1 = ((int64_t*)Sort.Table)[ix1]; f2 = ((int64_t*)Sort.Table)[ix2]; if (f1f2) z = 1; break; } case mxUINT64_CLASS: { uint64_t f1,f2; f1 = ((uint64_t*)Sort.Table)[ix1]; f2 = ((uint64_t*)Sort.Table)[ix2]; if (f1f2) z = 1; break; } case mxSINGLE_CLASS: { float f1,f2; f1 = ((float*)Sort.Table)[ix1]; f2 = ((float*)Sort.Table)[ix2]; z = isnan(f1) - isnan(f2); if (z) break; if (f1f2) z = 1; // else f1==f2 || (isnan(f1) && isnan(f2)) break; } case mxDOUBLE_CLASS: { double f1,f2; f1 = ((double*)Sort.Table)[ix1]; f2 = ((double*)Sort.Table)[ix2]; z = isnan(f1) - isnan(f2); if (z) break; if (f1f2) z = 1; // else f1==f2 || (isnan(f1) && isnan(f2)) break; } case mxINT16_CLASS: { int16_t f1,f2; f1 = ((int16_t*)Sort.Table)[ix1]; f2 = ((int16_t*)Sort.Table)[ix2]; if (f1f2) z = 1; break; } case mxUINT16_CLASS: { uint16_t f1,f2; f1 = ((uint16_t*)Sort.Table)[ix1]; f2 = ((uint16_t*)Sort.Table)[ix2]; if (f1f2) z = 1; break; } case mxINT8_CLASS: { int8_t f1,f2; f1 = ((int8_t*)Sort.Table)[ix1]; f2 = ((int8_t*)Sort.Table)[ix2]; if (f1f2) z = 1; break; } case mxUINT8_CLASS: { uint8_t f1,f2; f1 = ((uint8_t*)Sort.Table)[ix1]; f2 = ((uint8_t*)Sort.Table)[ix2]; if (f1f2) z = 1; break; } } i++; ix1 += Sort.Stride; ix2 += Sort.Stride; } return(z); } void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) { const mwSize *SZ; char flag_rows = 0; char done = 0; mwSize j, k, l; // running indices const mxArray *W = NULL; double *w = NULL; // check for proper number of input and output arguments if ((PInputCount <= 0) || (PInputCount > 3)) { mexPrintf("HISTO_MEX computes histogram from vector or column matrices\n\n"); mexPrintf("usage:\tHIS = histo_mex(Y)\n\t\tComputes histogram from each column\n"); mexPrintf("\t[HIS,tix] = histo_mex(Y,'rows')\n\t\tComputes row-wise histogram, tix is useful for data compression.\n\t\t Y = HIS.X(tix,:); \n\n"); mexPrintf("see also: HISTO2, HISTO3, HISTO4\n\n"); mexErrMsgTxt("HISTO_MEX requires 1 or 2 input arguments\n"); } if (POutputCount > 2) mexErrMsgTxt("histo.MEX has 1 output arguments."); // get 1st argument if (mxIsComplex(PInputs[0])) mexErrMsgTxt("complex argument not supported (yet). "); // TODO: support complex argument! if (PInputCount==1) ; // histo_mex(X) else if (mxIsChar(PInputs[1])) { // histo_mex(X,'rows') char *t = mxArrayToString(PInputs[1]); flag_rows = !strcmp(t,"rows"); mxFree(t); // histo_mex(X,'rows',W) if ((PInputCount>2) && mxIsDouble(PInputs[2])) W = PInputs[2]; } // histo_mex(X,W) else if (mxIsDouble(PInputs[1])) { W = PInputs[1]; } else mexErrMsgTxt("Weight vector must be REAL/DOUBLE."); if (W != NULL) { if (mxGetM(PInputs[0])==mxGetM(W) ) w = (double*)mxGetData(W); else mexErrMsgTxt("number of rows in X and W do not match."); for (k=0; (k=0.0); k++); if (k2) mexErrMsgTxt("Error HISTO.MEX: input must be vector or matrix (no more than two dimensions)"); size_t n = SZ[0]; size_t sz = 1; char flag = 0; const char *fnames[] = {"datatype","X","H"}; mxArray *HIS = mxCreateStructMatrix(1, 1, 3, fnames); mxSetField(HIS,0,"datatype",mxCreateString("HISTOGRAM")); if (flag_rows || (SZ[1]==1)) { ///***** SORT each column: initialize sorting algorithm size_t *idx = NULL; idx = (size_t*) mxMalloc(SZ[0]*sizeof(size_t)); for (n=0; n1) { POutput[1] = mxCreateNumericMatrix(SZ[0], 1, mxUINT64_CLASS,mxREAL); tix = (uint64_t*)mxGetData(POutput[1]); } // fill HIS.H and HIS.X mxArray *H = mxCreateNumericMatrix(n, 1, mxDOUBLE_CLASS,mxREAL); mxArray *X = mxCreateNumericMatrix(n, SZ[1], mxGetClassID(PInputs[0]),mxREAL); mxSetField(HIS,0,"H",H); mxSetField(HIS,0,"X",X); double *h = (double*)mxGetData(H); uint8_t *x = (uint8_t*)mxGetData(X); l = 0; if (tix) tix[idx[0]] = 1; for (k=0; k This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ This code was extracted from liblinear-1.51 in Jan 2010 and modified for the use with Octave This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . */ #ifndef _TRON_H #define _TRON_H class function { public: virtual double fun(double *w) = 0 ; virtual void grad(double *w, double *g) = 0 ; virtual void Hv(double *s, double *Hs) = 0 ; virtual int get_nr_variable(void) = 0 ; virtual ~function(void){} }; class TRON { public: TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000); ~TRON(); void tron(double *w); void set_print_string(void (*i_print) (const char *buf)); private: int trcg(double delta, double *g, double *s, double *r); double norm_inf(int n, double *x); double eps; int max_iter; function *fun_obj; void info(const char *fmt,...); void (*tron_print_string)(const char *buf); }; #endif NaN/src/svm.h0000664002356700235670000001142512512501472013511 0ustar schloeglschloegl/* This code was extracted from libsvm-3.12 in Apr 2015 and modified for the use with Octave Copyright (c) 2010,2011,2015 Alois Schloegl This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ Copyright (c) 2000-2012 Chih-Chung Chang and Chih-Jen Lin All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither name of copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LIBSVM_H #define _LIBSVM_H #define LIBSVM_VERSION 312 #ifdef __cplusplus extern "C" { #endif extern int libsvm_version; struct svm_node { int index; double value; }; struct svm_problem { int l; double *y; struct svm_node **x; }; enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */ enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */ struct svm_parameter { int svm_type; int kernel_type; int degree; /* for poly */ double gamma; /* for poly/rbf/sigmoid */ double coef0; /* for poly/sigmoid */ /* these are for training only */ double cache_size; /* in MB */ double eps; /* stopping criteria */ double C; /* for C_SVC, EPSILON_SVR and NU_SVR */ int nr_weight; /* for C_SVC */ int *weight_label; /* for C_SVC */ double* weight; /* for C_SVC */ double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */ double p; /* for EPSILON_SVR */ int shrinking; /* use the shrinking heuristics */ int probability; /* do probability estimates */ }; // // svm_model // struct svm_model { struct svm_parameter param; /* parameter */ int nr_class; /* number of classes, = 2 in regression/one class svm */ int l; /* total #SV */ struct svm_node **SV; /* SVs (SV[l]) */ double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */ double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */ double *probA; /* pariwise probability information */ double *probB; /* for classification only */ int *label; /* label of each class (label[k]) */ int *nSV; /* number of SVs for each class (nSV[k]) */ /* nSV[0] + nSV[1] + ... + nSV[k-1] = l */ /* XXX */ int free_sv; /* 1 if svm_model is created by svm_load_model */ /* 0 if svm_model is created by svm_train */ }; struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param); void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target); int svm_save_model(const char *model_file_name, const struct svm_model *model); struct svm_model *svm_load_model(const char *model_file_name); int svm_get_svm_type(const struct svm_model *model); int svm_get_nr_class(const struct svm_model *model); void svm_get_labels(const struct svm_model *model, int *label); double svm_get_svr_probability(const struct svm_model *model); double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values); double svm_predict(const struct svm_model *model, const struct svm_node *x); double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates); void svm_free_model_content(struct svm_model *model_ptr); void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr); void svm_destroy_param(struct svm_parameter *param); const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param); int svm_check_probability_model(const struct svm_model *model); void svm_set_print_string_function(void (*print_func)(const char *)); #ifdef __cplusplus } #endif #endif /* _LIBSVM_H */ NaN/src/sumskipnan_mex.cpp0000664002356700235670000006054512540557475016326 0ustar schloeglschloegl //------------------------------------------------------------------- // C-MEX implementation of SUMSKIPNAN - this function is part of the NaN-toolbox. // // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, see . // // // sumskipnan: sums all non-NaN values // usage: // [o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W); // // SUMSKIPNAN uses two techniques to reduce errors: // 1) long double (80bit) instead of 64-bit double is used internally // 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps // The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension). // // Input: // - x data array // - DIM (optional) dimension to sum // - flag (optional) is actually an output argument telling whether some NaN was observed // - W (optional) weight vector to compute weighted sum (default 1) // // Output: // - o (weighted) sum along dimension DIM // - count of valid elements // - sums of squares // // // $Id: sumskipnan_mex.cpp 12826 2015-06-18 15:09:49Z schloegl $ // Copyright (C) 2009,2010,2011 Alois Schloegl // This function is part of the NaN-toolbox // http://pub.ist.ac.at/~schloegl/matlab/NaN/ // //------------------------------------------------------------------- #include #include #include "mex.h" inline void __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); inline void __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); //#define NO_FLAG #ifdef tmwtypes_h #if (MX_API_VER<=0x07020000) typedef int mwSize; #endif #endif void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) { const mwSize *SZ; double* LInput; double* LOutputSum; double* LOutputCount; double* LOutputSum2; long double* LongOutputSum = NULL; long double* LongOutputCount = NULL; long double* LongOutputSum2 = NULL; double x; double* W = NULL; // weight vector mwSize DIM = 0; mwSize D1, D2, D3; // NN; // mwSize ND, ND2; // number of dimensions: input, output mwSize ix0, ix1, ix2; // index to input and output mwSize j, l; // running indices mwSize *SZ2; // size of output char flag_isNaN = 0; // check for proper number of input and output arguments if ((PInputCount <= 0) || (PInputCount > 4)) mexErrMsgTxt("SUMSKIPNAN.MEX requires between 1 and 4 arguments."); if (POutputCount > 4) mexErrMsgTxt("SUMSKIPNAN.MEX has 1 to 3 output arguments."); // get 1st argument if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]) && !mxIsSparse(PInputs[0]) ) LInput = mxGetPr(PInputs[0]); else mexErrMsgTxt("First argument must be and not sparse REAL/DOUBLE."); // get 2nd argument if (PInputCount > 1) { switch (mxGetNumberOfElements(PInputs[1])) { case 0: x = 0.0; // accept empty element break; case 1: x = (mxIsNumeric(PInputs[1]) ? mxGetScalar(PInputs[1]) : -1.0); break; default:x = -1.0; // invalid } if ((x < 0) || (x > 65535) || (x != floor(x))) mexErrMsgTxt("Error SUMSKIPNAN.MEX: DIM-argument must be a positive integer scalar"); DIM = (unsigned)floor(x); } // get size ND = mxGetNumberOfDimensions(PInputs[0]); // NN = mxGetNumberOfElements(PInputs[0]); SZ = mxGetDimensions(PInputs[0]); // if DIM==0 (undefined), look for first dimension with more than 1 element. for (j = 0; (DIM < 1) && (j < ND); j++) if (SZ[j]>1) DIM = j+1; if (DIM < 1) DIM=1; // in case DIM is still undefined ND2 = (ND>DIM ? ND : DIM); // number of dimensions of output SZ2 = (mwSize*)mxCalloc(ND2, sizeof(mwSize)); // allocate memory for output size for (j=0; j ND, add extra elements 1 SZ2[j] = 1; for (j=0, D1=1; j 3) { if (!mxGetNumberOfElements(PInputs[3])) ; // empty weight vector - no weighting else if (mxGetNumberOfElements(PInputs[3])==D2) W = mxGetPr(PInputs[3]); else mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension"); } int ACC_LEVEL = 0; { mxArray *LEVEL = NULL; int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); if (!s) { ACC_LEVEL = (int) mxGetScalar(LEVEL); if ((D1>1) && (ACC_LEVEL>2)) mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !"); } mxDestroyArray(LEVEL); } // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); // create outputs #define TYP mxDOUBLE_CLASS POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); LOutputSum = mxGetPr(POutput[0]); if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double)); if (POutputCount >= 2) { POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); LOutputCount = mxGetPr(POutput[1]); if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double)); } if (POutputCount >= 3) { POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); LOutputSum2 = mxGetPr(POutput[2]); if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double)); } mxFree(SZ2); if (!D1 || !D2 || !D3) // zero size array ; // do nothing else if (D1==1) { if (ACC_LEVEL<1) { // double accuray, naive summation, error = N*2^-52 switch (POutputCount) { case 0: case 1: #pragma omp parallel for schedule(dynamic) for (l = 0; l DIM for (l = 0; l DIM for (l = 0; l DIM for (l = 0; l 2) && mxGetNumberOfElements(PInputs[2])) { // set FLAG_NANS_OCCURED switch (mxGetClassID(PInputs[2])) { case mxLOGICAL_CLASS: case mxCHAR_CLASS: case mxINT8_CLASS: case mxUINT8_CLASS: *(uint8_t*)mxGetData(PInputs[2]) = 1; break; case mxDOUBLE_CLASS: *(double*)mxGetData(PInputs[2]) = 1.0; break; case mxSINGLE_CLASS: *(float*)mxGetData(PInputs[2]) = 1.0; break; case mxINT16_CLASS: case mxUINT16_CLASS: *(uint16_t*)mxGetData(PInputs[2]) = 1; break; case mxINT32_CLASS: case mxUINT32_CLASS: *(uint32_t*)mxGetData(PInputs[2])= 1; break; case mxINT64_CLASS: case mxUINT64_CLASS: *(uint64_t*)mxGetData(PInputs[2]) = 1; break; case mxFUNCTION_CLASS: case mxUNKNOWN_CLASS: case mxCELL_CLASS: case mxSTRUCT_CLASS: default: mexPrintf("Type of 3rd input argument not supported."); } } #endif } #define stride 1 inline void __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) { long double sum=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector long double count = 0.0; do { long double x = *data; if (!isnan(x)) { count += *W; sum += *W*x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = count; } else { // w/o weight vector size_t countI = 0; do { long double x = *data; if (!isnan(x)) { countI++; sum += x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (double)countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = sum; } inline void __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) { long double sum=0; long double msq=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector long double count = 0.0; do { long double x = *data; if (!isnan(x)) { count += *W; long double t = *W*x; sum += t; msq += x*t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = count; } else { // w/o weight vector size_t countI = 0; do { long double x = *data; if (!isnan(x)) { countI++; sum += x; msq += x*x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (double)countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = sum; *s2 = msq; } inline void __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) { double sum=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector double count = 0.0; do { double x = *data; if (!isnan(x)) { count += *W; sum += *W*x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = count; } else { // w/o weight vector size_t countI = 0; do { double x = *data; if (!isnan(x)) { countI++; sum += x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (double)countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = sum; } inline void __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) { double sum=0; double msq=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector double count = 0.0; do { double x = *data; if (!isnan(x)) { count += *W; double t = *W*x; sum += t; msq += x*t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = count; } else { // w/o weight vector size_t countI = 0; do { double x = *data; if (!isnan(x)) { countI++; sum += x; msq += x*x; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (double)countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = sum; *s2 = msq; } /*************************************** using Kahan's summation formula [1] this gives more accurate results while the computational effort within the loop is about 4x as high First tests show a penalty of about 40% in terms of computational time. [1] David Goldberg, What Every Computer Scientist Should Know About Floating-Point Arithmetic ACM Computing Surveys, Vol 23, No 1, March 1991. ****************************************/ inline void __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) { long double sum=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector long double count = 0.0; long double rc=0.0, rn=0.0; do { long double x = *data; long double t,y; if (!isnan(x)) { //count += *W; [1] y = *W-rn; t = count+y; rn= (t-count)-y; count= t; //sum += *W*x; [1] y = *W*x-rc; t = sum+y; rc= (t-sum)-y; sum= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = count; } else { // w/o weight vector size_t countI = 0; long double rc=0.0; do { long double x = *data; long double t,y; if (!isnan(x)) { countI++; // sum += x; [1] y = x-rc; t = sum+y; rc= (t-sum)-y; sum= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (double)countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = sum; } inline void __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) { long double sum=0; long double msq=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector long double count = 0.0; long double rc=0.0, rn=0.0, rq=0.0; do { long double x = *data; long double t,y; if (!isnan(x)) { //count += *W; [1] y = *W-rn; t = count+y; rn= (t-count)-y; count= t; long double w = *W*x; //sum += *W*x; [1] y = *W*x-rc; t = sum+y; rc= (t-sum)-y; sum= t; // msq += x*w; y = w*x-rq; t = msq+y; rq= (t-msq)-y; msq= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = count; } else { // w/o weight vector size_t countI = 0; long double rc=0.0, rq=0.0; do { long double x = *data; long double t,y; if (!isnan(x)) { countI++; //sum += x; [1] y = x-rc; t = sum+y; rc= (t-sum)-y; sum= t; // msq += x*x; y = x*x-rq; t = msq+y; rq= (t-msq)-y; msq= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (double)countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = sum; *s2 = msq; } inline void __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) { double sum=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector double count = 0.0; double rc=0.0, rn=0.0; do { double x = *data; double t,y; if (!isnan(x)) { //count += *W; [1] y = *W-rn; t = count+y; rn= (t-count)-y; count= t; //sum += *W*x; [1] y = *W*x-rc; t = sum+y; rc= (t-sum)-y; sum= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = count; } else { // w/o weight vector size_t countI = 0; double rc=0.0; do { double x = *data; double t,y; if (!isnan(x)) { countI++; // sum += x; [1] y = x-rc; t = sum+y; rc= (t-sum)-y; sum= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (double)countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = sum; } inline void __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) { double sum=0; double msq=0; char flag=0; // LOOP along dimension DIM double *end = data + stride*Ni; if (W) { // with weight vector double count = 0.0; double rc=0.0, rn=0.0, rq=0.0; do { double x = *data; double t,y; if (!isnan(x)) { //count += *W; [1] y = *W-rn; t = count+y; rn= (t-count)-y; count= t; double w = *W*x; //sum += *W*x; [1] y = *W*x-rc; t = sum+y; rc= (t-sum)-y; sum= t; // msq += x*w; y = w*x-rq; t = msq+y; rq= (t-msq)-y; msq= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 W++; } while (data < end); *No = count; } else { // w/o weight vector size_t countI = 0; double rc=0.0, rq=0.0; do { double x = *data; double t,y; if (!isnan(x)) { countI++; //sum += x; [1] y = x-rc; t = sum+y; rc= (t-sum)-y; sum= t; // msq += x*x; y = x*x-rq; t = msq+y; rq= (t-msq)-y; msq= t; } #ifndef NO_FLAG else flag = 1; #endif data++; // stride=1 } while (data < end); *No = (double)countI; } #ifndef NO_FLAG if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; #endif *s = sum; *s2 = msq; } NaN/VERSION0000664002356700235670000000013712546555136013027 0ustar schloeglschloegl# NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN # Version: 2.8.1 # Date: 2015-07-06 NaN/NEWS0000664002356700235670000000231512546555065012457 0ustar schloeglschloegl2015-07-06: Release of NaN-tb v2.8.1 - fix nantest to avoid crashing octave 4.0.0 on windows 2015-06-24: Release of NaN-tb v2.8.0 - fix Makefile for Octave4 on windows (fixes #45363 and #44859) - check for sparse input matrices and convert to full when needed 2015-06-01: Release of NaN v.2.7.6 - improve accuracy of normcdf (bug #38170) 2015-04-23: Release of NaN v2.7.5 - fix compiler issue with __isnan - do not display diagnostic messages 2015-04-12 - upgrade to libsvm-3.12 - fix multi-threaded build (make -j) - improve some tests 2015-03-31: Release of NaN 2.7.2 - bug fix in nanstd.m - cross-compiler issues in histo_mex - fix Makefile for use with Matlab - address compatibility issue on debian/jessie - minor issues (some compiler warnings are addressed) 2015-01-24: Release of NaN 2.7.1 - support for MacOSX added (tested on Homebrew Octave) 2015-01-17: Release of NaN 2.7.0 Fix compatibility issues with core functions in Matlab and Octave - zscore: order of input output argument - mahal gives now same result than matlab - sumskipnan allows DIM argument larger than ndims - a few minor issues For changes of previous releases see: http://pub.ist.ac.at/~schloegl/matlab/NaN/CHANGELOG NaN/DESCRIPTION0000664002356700235670000000055212546555136013466 0ustar schloeglschloeglName: NaN Version: 2.8.1 Date: 2015-07-06 Author: Alois Schloegl Maintainer: Alois Schloegl Title: The NaN-toolbox Description: A statistics and machine learning toolbox for data with and w/o missing values Depends: octave (> 3.2.0) License: GPLv3+ Url: http://pub.ist.ac.at/~schloegl/matlab/NaN Autoload: no SVNRelease: $Rev: 12813 $ NaN/test/0000775002356700235670000000000012546555135012734 5ustar schloeglschloeglNaN/test/test_perf_skipnan.m0000664002356700235670000000342711601145313016616 0ustar schloeglschloegl %% TEST_PERF_NANTB % $Id$ % Copyright (C) 2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 % USA %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Performance %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % SUMSKIPNAN_MEX with and w/o OpenMP %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Generate Test data y = randn(1e7,32); flag=0; N = 10; % repeat the tests 10 times t1=repmat(N,2); t2=repmat(N,2); for k=1:N; tic;t=cputime(); [s,n]=sumskipnan_mex(y,1); t1(k,1)=cputime()-t; t1(k,2)=toc; tic;t=cputime(); [c,n]=covm_mex(y,[],flag); t2(k,1)=cputime()-t; t2(k,2)=toc; end; [[t1,t2];mean([t1,t2]);std([t1,t2])] exp(-diff(log([mean(t1)',mean(t2)']))) NaN/test/test_train_sc.m0000664002356700235670000000451511666744477015773 0ustar schloeglschloegl% Test train_sc and test_sc, weighted samples % $Id$ % Copyright (C) 2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. clear classifier= {'REG','REG2','MDA','MD2','QDA','QDA2','LD2','LD3','LD4','LD5','LD6','NBC','aNBC','WienerHopf','PLA', 'LMS','LDA/DELETION','MDA/DELETION','NBC/DELETION','RDA/DELETION','RDA','GDBC','SVM','RBF'};% 'LDA/GSVD','MDA/GSVD', 'LDA/GSVD','MDA/GSVD', 'LDA/sparse','MDA/sparse', N=1e2; c=[1:N]'*2>N; W3 = [ones(1,N/2)/5,ones(1,N/10)]; for l=1:length(classifier), fprintf(1,'%s\n',classifier{l}); for k=1:10, x=randn(N,2); x=x+[c,c]; ix = 1:0.6*N; try, CC = train_sc(x(ix,:),c(ix)+1,classifier{l}); R1 = test_sc(CC,x,[],c+1); CC = train_sc(x,c+1,classifier{l}); R2 = test_sc(CC,x,[],c+1); CC = train_sc(x(ix,:),c(ix)+1,classifier{l},W3); R3 = test_sc(CC,x,[],c+1); acc1(k,l)=[R1.ACC]; kap1(k,l)=[R1.kappa]; acc2(k,l)=[R2.ACC]; kap2(k,l)=[R2.kappa]; acc3(k,l)=[R3.ACC]; kap3(k,l)=[R3.kappa]; end; end; end; [se,m]=sem(acc1);m [se,m]=sem(acc2);m [se,m]=sem(acc3);m %[diff(m),diff(m)/sqrt(sum(se.^2))] %[se,m]=sem(kap);[diff(m),diff(m)/sqrt(sum(se.^2))] %These are tests to compare varios classiers return N=1e2; c=[1:N]'*2>N; for k=1:1000,k x=randn(N,2); x=x+[c,c]; ix = 1:0.6*N; [R1,CC]=xval(x(ix,:),c(ix)+1,'REG'); [R2,CC]=xval(x,c+1,'REG'); [R3,CC]=xval(x(ix,:),c(ix)+1,'LDA'); [R4,CC]=xval(x,c+1,'LDA'); acc(k,1:4)=[R1.ACC,R2.ACC,R3.ACC,R4.ACC]; kap(k,1:4)=[R1.kappa,R2.kappa,R3.kappa,R4.kappa]; end; [se,m]=sem(acc),%[diff(m),diff(m)/sqrt(sum(se.^2))] %[se,m]=sem(kap);[diff(m),diff(m)/sqrt(sum(se.^2))] NaN/test/test_str2array.csv0000775002356700235670000000041212323251537016431 0ustar schloeglschloegl"remarks";"id";"type";"first";"last";"excluding";"list";"timestamp" ;"JK130515a";1;8;9;;"[8:9]"; ;"JK130612b";1;3;4;;"[3:4]"; ;"JK130925b";1;3;4;;"[3:4]"; ;"JK131004b";1;3;4;;"[3:4]"; ;"JK131010a";1;9;10;;"[9:10]"; ;;;;;;; ;;;;;;; "cell type: 1=CA1";;;;;;; NaN/test/test_mex_accuracy.m0000664002356700235670000000576211601145313016606 0ustar schloeglschloegl% TEST_MEX_ACCURACY evaluates the accuracy and speed of % different accuracy levels in SUMSKIPNAN_MEX and COVM_MEX % % see also: FLAG_ACCURACY_LEVEL, SUMSKIPNAN_MEX, COVM_MEX % % Reference: % [1] David Goldberg, % What Every Computer Scientist Should Know About Floating-Point Arithmetic % ACM Computing Surveys, Vol 23, No 1, March 1991. % $Id$ % Copyright (C) 2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA clear flag=0; N = 1e7; x=randn(N,10)+1e6; level = flag_accuracy_level; %% backup original level flag_accuracy_level(0); tic,t=cputime();[cc0,nn0]=covm_mex(x,[],flag);t0=[cputime-t,toc]; flag_accuracy_level(1); tic,t=cputime();[cc1,nn1]=covm_mex(x,[],flag);t1=[cputime-t,toc]; flag_accuracy_level(2); tic,t=cputime();[cc2,nn2]=covm_mex(x,[],flag);t2=[cputime-t,toc]; flag_accuracy_level(3); tic,t=cputime();[cc3,nn3]=covm_mex(x,[],flag);t3=[cputime-t,toc]; tic,t=cputime();cc4=x'*x;nn4=size(x,1);t4=[cputime-t,toc]; flag_accuracy_level(0); tic,t=cputime();[c0,n0]=sumskipnan_mex(x,1,flag);t0s=[cputime-t,toc]; flag_accuracy_level(1); tic,t=cputime();[c1,n1]=sumskipnan_mex(x,1,flag);t1s=[cputime-t,toc]; flag_accuracy_level(2); tic,t=cputime();[c2,n2]=sumskipnan_mex(x,1,flag);t2s=[cputime-t,toc]; flag_accuracy_level(3); tic,t=cputime();[c3,n3]=sumskipnan_mex(x,1,flag);t3s=[cputime-t,toc]; tic,t=cputime();c4=sum(x,1);n4=size(x,1);t4s=[cputime-t,toc]; flag_accuracy_level(level); %% restore original level cc = {cc0,cc1,cc2,cc3}; c = {c0,c1,c2,c3}; tt = [t0;t1;t2;t3;t4]; t = [t0s;t1s;t2s;t3s;t4s]; fprintf('Sum squared differences between accuracy levels:\n'); fprintf('Level:\t|(0) naive-dou\t|(1) naive-ext\t|(2) kahan-dou \t| (3) kahan-ext\n') fprintf('error:\t|N*2^-52\t|N*2^-64\t| 2^-52 \t| 2^-64\n') fprintf('COVM_MEX:\ntime:\t|%f\t|%f\t| %f \t| %f',tt(:,1)) for K1=1:4, fprintf('\n(%i)\t',K1-1); for K2=1:4, EE(K1,K2)=sum(sum((cc{K1}-cc{K2}).^2)); E(K1,K2) =sum(sum((c{K1}-c{K2}).^2)); fprintf('|%8g\t',EE(K1,K2)/nn1(1)); end; end; fprintf('\nSUMSKIPNAN_MEX:\n') fprintf('time:\t|%f\t|%f\t| %f \t| %f',t(:,1)) for K1=1:4, fprintf('\n(%i)\t',K1-1); for K2=1:4, fprintf('|%8g\t',E(K1,K2)/n1(1)); end; end; fprintf('\n'); NaN/test/test_fss.m0000664002356700235670000000516612507511345014744 0ustar schloeglschloegl% TEST_FSS test of fss.m % $Id$ % Copyright (C) 2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. if 1, clear if ~exist('ue6.mat','file') if strncmp(computer,'PCWIN',5) fprintf(1,'Download http://pub.ist.ac.at/~schloegl//LV/SMBS/UE6/ue6.mat and save in local directory %s\nPress any key to continue ...\n',pwd); pause; else unix('wget http://pub.ist.ac.at/~schloegl//LV/SMBS/UE6/ue6.mat'); end; end load ue6; N = 10; % select N highest ranked features [ix,score] = fss(data, C, N); end; classifier= {'REG','MDA','MD2','QDA','QDA2','LD2','LD3','LD4','LD5','LD6','NBC','aNBC','WienerHopf','LDA/GSVD','MDA/GSVD', 'LDA/sparse','MDA/sparse', 'PLA', 'LMS','LDA/DELETION','MDA/DELETION','NBC/DELETION','RDA/DELETION','REG/DELETION','RDA','GDBC','SVM','PSVM','SVM11','SVM:LIN4','SVM:LIN0','SVM:LIN1','SVM:LIN2','SVM:LIN3'};%,'RBF' %% compute cross-validated result; for k=1:N [R1{k},CC1{k}]=xval(data(:,ix(1:k)),C); end; for k=1:length(classifier), fprintf(1,'%i:\t%s\n',k,classifier{k}); [R2{k},CC2{k}]=xval(data(:,ix(1:5)),C,classifier{k}); end; fprintf(1,'#\tFeature\tN\tACC [%%]\tKappa+-se\t I [bit]\n'); R=R1; for k=1:length(R); n(k)=sum(R{k}.data(:)); ACC(k)=R{k}.ACC; KAP(k)=R{k}.kappa; KAP_Se(k)=R{k}.kappa_se; MI(k)=R{k}.MI; fprintf(1,'%3i:\t%4i\t%i\t%5.2f\t%5.2f+-%5.2f\t%4.2f\n',k,ix(k),n(k),ACC(k),KAP(k),KAP_Se(k),MI(k)); end R=R2; for k=1:length(R); n(k)=sum(R{k}.data(:)); ACC(k)=R{k}.ACC; KAP(k)=R{k}.kappa; KAP_Se(k)=R{k}.kappa_se; MI(k)=R{k}.MI; fprintf(1,'%3i:\t%8s\t%i\t%5.2f\t%5.2f+-%5.2f\t%4.2f\n',k,classifier{k},n(k),ACC(k),KAP(k),KAP_Se(k),MI(k)); end %% display plot(ACC*100,'x'); set(gca,'YLim',[0,100]) ylabel('Accuracy [%]') title('selection of N out of 2540 features') NaN/test/test_xptopen.m0000775002356700235670000000412212507564326015647 0ustar schloeglschloegl% TEST_XPTOPEN tests XPTOPEN % $Id: test_xptopen.m 12769 2015-04-03 19:24:06Z schloegl $ % Copyright (C) 2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://biosig-consulting.com/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. %x.c = [-1000,-2,-1,0,1,2,NaN,10,100,1000,10000,1e6,1e7,1e8]'; %y.Y = [1,2,NaN,1]'+10; if 1, X.a = [-2,-0,NaN,10,444,-pi]';%,100,1000,10000,1e6,1e7,1e8]'; X.d = [1,2,NaN,1,Inf,-Inf]'+10; X.b = {'a','B',' ','*','Z','zzz'}'; fn = 'test.xpt'; Y = xptopen(fn,'w',X) Z = xptopen(fn,'r') end; fn = {'buy','humid','prdsale'}; for k1 = 1:length(fn); X = xptopen(fn{k1},'r'); xptopen([fn{k1},'.xpt'],'w',X); f = fieldnames(X); fid = fopen([fn{k1},'.csv'],'w'); for k1=1:length(f) if k1>1, fprintf(fid,';'); end; fprintf(fid,'%s',f{k1}); end; fprintf(fid,'\n'); for k2=1:length(X.(f{1})); for k1=1:length(f) if k1>1, fprintf(fid,';'); end; v = X.(f{k1})(k2); if isnumeric(v) if strcmp(f{k1},'DATE'), fprintf(fid,'%s',datestr(v + datenum([1960,1,1]),1)); elseif strcmp(f{k1},'MONTH'), fprintf(fid,'%s',datestr(v + datenum([1960,1,1]),3)); elseif v==ceil(v), fprintf(fid,'%i',v); else fprintf(fid,'%f',v); end elseif iscell(v) && ischar(v{1}) fprintf(fid,'%s',v{1}); else fprintf(fid,'--'); end; end; fprintf(fid,'\n'); end; fclose(fid); end; NaN/test/test_classify.m0000664002356700235670000000324412507511345015761 0ustar schloeglschloegl% TEST_CLASSIFY tests and compares NaN/CLASSIFY.M with the matlab version of CLASSIFY % $Id$ % Copyright (C) 2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. clear load_fisheriris SL = meas(51:end,1); SW = meas(51:end,2); group = species(51:end); h1 = gscatter(SL,SW,group,'rb','v^',[],'off'); set(h1,'LineWidth',2) legend('Fisher versicolor','Fisher virginica','Location','northwest') [X,Y] = meshgrid(linspace(4.5,8),linspace(2,4)); X = X(:); Y = Y(:); classifiers={'linear','quadratic','diagLinear','diagQuadratic','mahalanobis'}; p = which('train_sc.m'); p = fileparts(p); rmpath(p); for k=1:length(classifiers) [C1,err(1,k),P1,logp1,coeff1] = classify([X Y],[SL SW],group,classifiers{k}); end; addpath(p); for k=1:length(classifiers) [C2,err(2,k),P2,logp2,coeff2] = classify([X Y],[SL SW],group,classifiers{k}); end; err, NaN/test/test_str2array.m0000664002356700235670000000031512323251537016071 0ustar schloeglschloeglfid = fopen('test_str2array.csv','r'); %% corrected directory if fid<0, return; end; s = fread(fid,[1,inf],'uint8=>char'); fclose(fid); s(s==10)=[]; [n,v,c]=str2array(s,[';',char(9)],char([10,13])) NaN/test/test_xval.m0000664002356700235670000000410112507564326015116 0ustar schloeglschloegl% test_classifier; % $Id$ % Copyright (C) 2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. if 1, clear N=100; % number of samples M=10; % number of features classifier= {'SVM:LIB','REG','MDA','MD2','QDA','QDA2','LD2','LD3','LD4','LD5','LD6','NBC','aNBC','WienerHopf','LDA/GSVD','MDA/GSVD', 'LDA/sparse','MDA/sparse', 'PLA', 'LMS','LDA/DELETION','MDA/DELETION','NBC/DELETION','RDA/DELETION','REG/DELETION','RDA','GDBC','SVM','RBF','PSVM','SVM11','SVM:LIN4','SVM:LIN0','SVM:LIN1','SVM:LIN2','SVM:LIN3','WINNOW'}; %classifier= {'SVM:RBF'}; x = randn(N,M); % data c = ([1:N]'>(N/2))+1; % classlabel %w = [ones(1,N/2)/5,ones(1,N/10),zeros(1,4*N/10)]; w = []; % no weightening x = randn(N,M); x = x+c*ones(1,M); if 1, %x(2:2:N/2,2) = NaN; x(2:2:N,2) = NaN; x(3,2:2:end) = NaN; end; end; for k = 1:length(classifier); try, [R{k},CC{k}] = xval(x, {c,w}, classifier{k}); fprintf(1,'%8s\t%i\t%5.2f\t%5.2f+-%5.2f\n',classifier{k},sum(R{k}.data(:)),R{k}.ACC*100,R{k}.kappa,R{k}.kappa_se); save -v6 debug.mat catch, R{k} = []; end; end; for k = 1:length(R) if isempty(R{k}) fprintf(1,'%8s \t failed\n',classifier{k}); else fprintf(1,'%8s\t%i\t%5.2f\t%5.2f+-%5.2f\n',classifier{k},sum(R{k}.data(:)),R{k}.ACC*100,R{k}.kappa,R{k}.kappa_se); end; end NaN/INDEX0000664002356700235670000000122711736366420012546 0ustar schloeglschloeglnan >> A statistics and machine learning toolbox A statistics and machine learning toolbox for data with and w/o missing values coefficient_of_variation geomean meansq skewness covm cor cov corrcoef harmmean median statistic detrend kurtosis moment std mad naninsttest nantest nansum nanstd nanconv nanfft nanfilter nanfilter1uc normpdf normcdf norminv meandev percentile quantile rankcorr ranks rms sumskipnan var mean sem spearman trimean tpdf tcdf tinv zscore flag_implicit_significance xcovf train_sc test_sc xval classify train_lda_sparse decovm gscatter mahal cdfplot hist2res fss cat2bin ttest ttest2 xptopen bland_altman cumsumskipnan range NaN/COPYING0000664002356700235670000010451311223123667013004 0ustar schloeglschloegl GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . NaN/doc/0000775002356700235670000000000012546555135012522 5ustar schloeglschloeglNaN/doc/README.TXT0000664002356700235670000002772312263773327014074 0ustar schloeglschloeglNaN-Tb: A statistics toolbox ------------------------------------------------------------ Copyright (C) 2000-2005,2009,2010,2011,2014 Alois Schloegl FEATURES of the NaN-tb: ----------------------- - statistical toolbox - machine learning and classification toolbox - NaN's are treated as missing values - supports weightening of data - usage of multiple CPU cores - supports DIM argument - less round-off errors using extended double - less but more powerful functions (no nan-FUN needed) - supports unbiased estimation - fixes known bugs - compatible with Matlab and Octave - easy to use - The toolbox is tested with Octave 3.x and Matlab 7.x Currently are implemented: -------------------------- level 1: basic functions (not derived) SUMSKIPNAN SUM is a built-in function and cannot not be replaced, For this reason, a different name (than SUM) had to be chosen. SUMSKIPNAN is central, it implements skipping NaN's, the DIM-argument and returns the number of valid elements, too. COVM covariance estimation (several modes) Round-off errors avoided by using internally extended accuracy CUMSUMSKIPNAN Cumulative sum, skipping NaN's DECOVM decomposes the extended covarianced matrix into mean and cov XCOVF cross-correlation function FLAG_NANS_OCCURED returns 0 if no NaN's appeared in the input data of the last call to one of the following functions, and 1 otherwise: sumskipnan, covm, center, cor, coefficient of variation, corrcoef, geomean, harmmean, kurtosis, mad, mean, meandev, meansq, moment, nanmean, nanstd, nansum, rms, sem, skewness, statistic, std, var FLAG_IMPLICIT_SKIP_NAN can be used to turn off and on the NaN-skipping behaviour. This can be useful for debugging or for compatibility reasons. FLAG_ACCURACY_LEVEL can be used to increase the accuracy of summations (sumskipnan and covm) at the cost of speed. LOAD_FISHERIRIS loads famous fisher iris data set STR2ARRAY convert string to array - useful to extract numeric data from delimiter files XPTOPEN read and write SAS Transport Format (XPT); reads ARFF and STATA files The following functions are experimental, not all effects of missing values are fully understood. E.g. Missing values can cause aliasing, also effects on bandpass und highpass filters need to be investigated. NANCONV convolution NANCONV2 2-dimensional convolution NANFILTER filter function NANFFT Fourier transform level 2a: derived functions MEAN mean (options: arithmetic, geometric, harmonic) VAR variance STD standard deviation MEDIAN median (currently only for 2-dim matrices) SEM standard error of the mean (does not depend on distribution) TRIMMEAN trimmed mean medAbsDev median absolute deviation MEANSQ mean square RMS root mean square STATISTIC estimates various statistics at once MOMENT moment SKEWNESS skewness KURTOSIS excess * IQR interquartile range MAD mean absolute deviation * RANGE range (max-min) CENTER removes mean ZSCORE normalizes x to zero mean and variance 1 (z = (x-mean)/std) zScoreMedian non-parametric z-score, normalizes is to zero median and 1/(1.483*median absolute deviation) HARMMEAN harmonic mean GEOMEAN geometric mean NANTEST checks whether all functions have been replaced DETREND detrending of data with missing values and non-equidistant sampled data COR correlation matrix COV covariance matrix CORRCOEF correlation coefficient, including rank correlation, significance test and confidence intervals SPEARMAN, RANKCORR spearman's rank correlation coefficient. They might be replaced by CORRCOEF. PARTCORRCOEF partial correlation coefficient RANKS calculates ranks for non-parametric statistics TIEDRANK similar to RANKS, used for compatibility reasons QUANTILE q-th quantile PRCTILE,PERCENTILE p-th percentile TRIMEAN trimean BLAND_ALTMANN Bland-Altmann plot ECDF empirical cumulative distribution function CDFPLOT plot empirical cumulative distribution function GSCATTER scatter plot of grouped data NORMPDF normal probability distribution NORMCDF normal cumulative distribution NORMINV inverse of the normal cumulative distribution TPDF student probability distribution TCDF student cumulative distribution TINV inverse of the student cumulative distribution NANSUM, NANSTD fixes for buggy versions included TTEST paired t-test TTEST2 (unpaired) t-test level 2b: classification, cross-validation TRAIN_SC train classifier TEST_SC test classifier CLASSIFY classify data (no cross validation) XVAL classify data with cross validation KAPPA performance evaluation TRAIN_LDA_SPARSE utility function FSS feature subset selection and feature ranking CAT2BIN converts categorial to binary data SVMTRAIN_MEX libSVM-training algorithm ROW_COL_DELETION heuristic to select rows and columns to remove missing values REFERENCE(S): ---------------------------------- [1] http://www.itl.nist.gov/ [2] http://mathworld.wolfram.com/ What is the difference to previous implementations? =================================================== 1) The default behavior of previous implementations is that NaNs in the input data results in NaNs in the output data. In many applications this behavior is not what you want. In this implementation, NaNs are handled as missing values and are skipped. 2) In previous implementations the workaround was using different functions like NANSUM, NANMEAN etc. In this toolbox, the same routines can be applied to data with and without NaNs. This enables more natural (better read- and understandable) applications. 3) SUMSKIPNAN is central to the other functions. It implements - the DIMENSION-argument, - handles NaNs as missing values or as exception signal (depending on a hidden FLAG), - and returns the number of valid elements (which are not NaNs) in the second output argument. (Note, NANSUM from Matlab does not support the DIM-argument, and NANSUM(NaN) gives NaN instead of 0); 4) [obsolete] 5) The DIMENSION argument is implemented in most routines. These should work in all Matlab and Octave versions. A workaround for a bug in Octave versions <=2.1.35 is implemented. Also several functions from Matlab have no support for the DIM argument (e.g. SKEWNESS, KURTOSIS, VAR) 6) Compatible to previous Octave implementation MEAN implements also the GEOMETRIC and HARMONIC mean. Handling of some special cases has been removed because its not necessary, anymore. MOMENT implements Mode 'ac' (absolute and/or central) moment as implemented in Octave. 7) Performance increase In most numerical applications, NaN's should be simply skipped. Therefore, it is efficient to skip NaN's in the default case. In case an explicit check for NaN's is necessary, implicit exception handling could be avoided. Eventually the overall performance could increase. 8) More readable code An explicit check for NaN's display the importance of this special case. Therefore, the application program might be more readable. 9) ZSCORE, MAD, HARMMEAN and GEOMEAN DIM-argument and skipping of NaN's implemented. None of these features is implemented in the Matlab versions. 10a) NANMEAN, NANVAR, NANMEDIAN These are not necessary anymore. They are implemented in SUMSKIPNAN, MEAN, VAR, STD and MEDIAN, respectively. 10b) NANSUM, NANSTD These functions are obsolete, too. However, previous implementations do not always provide the expected result. Therefore, a correct version is included for backward compatibility. 11) GPL license Permits to implement useful modifications. 12) NORMPDF, NORMCDF, NORMINV In the Matlab statistics toolbox V 3.0, NORMPDF, NORMCDF and NORMINV gave incorrect results for SIGMA=0; A similar problem was observed in Octave with NORMAL_INV, NORMAL_PDF, and NORMALCDF. The problem is fixed with this version. Furthermore, the check of the input arguments is implemented simpler and easier in this versions. 13) TPDF, TCDF, TINV In the Matlab statistics toolbox V3.0(12.1) and V4.0(13), TCDF and TINV do not handle NaNs correctly. TINV returns 0 instead of NaN, TCDF stops with an error message. In Stats-tb V2.2(R11) TINV has also the same problem. For these reasons, the NaN-tb is a bug fix. Furthermore, the check of the input arguments is implemented simpler. Overall, the code becomes cleaner and leaner. 14) NANCONV, NANCONV2, NANFFT, NANFILTER, NANFILTER1UC are signal processing functions for graceful handling of data with missing values. These functions are very experimental, because the behavior in case of data with missing values is not fully investigated. E.g. missing values can cause aliasing, and also the behavior of bandpass and highpass filters is not sufficiently investigated. Therefore, these functions should be used with care. Q: WHY SKIPPING NaN's?: ------------------------ A: Usually, NaN means that the value is not available. This meaning is most common, even many different reasons might cause NaN's. In statistics, NaN's represent missing values, in biosignal processing such missing values might have been caused by some recording error. Other reasons for NaN's are, undetermined expressions like e.g. 0/0, inf-inf, data not available, unknown value, not a numeric value, etc. If NaN has the meaning of a missing value, it is only consequent to say, the sum of NaN's should be zero. Similar arguments hold for the other functions. The mean of X is undefined if and only if X contains no numbers. The implementation sum(X)/sum(~isnan(X)) gives 0/0=NaN, which is the desired result. The variance of X is undefined if and only if X contains less than 2 numbers. In most numerical applications, NaN's should be simply skipped. Therefore, it is efficient to skip NaN's in the default case. In the other cases, the NaN's can still be checked explicitly. This could eventually result in a more readable code and in improved performance, too. Q: What if I need to check for NaN's: ------------------------------------- A: You can always check whether there were some skipped NaN's in your data with the command FLAG_NANS_OCCURED(). m = mean(x); if flag_nans_occured() % do your error handling, e.g. error('there were NaN's in x, ignore m'); end; Its also easy to control the granularity of the checks flag_nans_occured(); % reset flag % do any statistical analysis you want if flag_nans_occured() % check, whether some NaN's occured. end; Installing the NaN-tb for Octave and Matlab: -------------------------------------------- a) Extract files and save them in /your/directory/structure/to/NaN/ b) Include the path with one of the following commands: addpath('/your/directory/structure/to/NaN/') path('/your/directory/structure/to/NaN/',path) Make sure the functions in the NaN-toolbox are found before the default functions. c) run NANINSTTEST This checks whether the installation was successful. d) Compile mex files: This is useful to improve speed, and is required if you used weighted samples. Check if precompiled binaries are provided. If your platform is not supported, compile the C-Mex-functions using "make". Run NANINSTTEST again to check the stability of the compiled SUMSKIPNAN. $Id: README.TXT 12492 2014-01-10 13:34:15Z schloegl $ Copyright (C) 2000-2005,2009,2010,2011,2014 by Alois Schloegl http://pub.ist.ac.at/~schloegl/matlab/NaN/ LICENSE: This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . NaN/doc/INSTALL0000664002356700235670000000471112263773327013557 0ustar schloeglschloegl Installing the NaN-tb for Octave and Matlab: -------------------------------------------- a) Extract files and save them in /your/directory/structure/to/NaN/ b) Include the path with one of the following commands: HOME = getenv('HOME'); % if needed, change path to /your/directory/structure/to/ addpath([HOME,'/NaN/') addpath([HOME,'/NaN/inst') addpath([HOME,'/NaN/src') Make sure the functions in the NaN-toolbox are found before the default functions. The NaN-toolbox contains some functions like mean, var, std, cor, cov, and corrcoef which work equivalently than the orginal function - the only exception is that missing values (NaN) are skipped. [You can turn off that behavior by settting flag_implicit_skip_nan(0) ]. Alternatively, You can avoid this by including the directories at the end of the path addpath([HOME,'/NaN/','-end') addpath([HOME,'/NaN/inst','-end') addpath([HOME,'/NaN/src','-end') c) The use of mex-files is recommended for using the full capabilities of the NaN-toolbox. Without the mex-files, SVN- and Liblinear classifiers are not available, and in some cases slower m-functions are used. Setup your mex compiler using GCC, the GNU Compiler Collection (or some derivative like MinGW) >> mex -setup Run MAKE from the directory .../NaN/src/ Compiling the mex-files is well tested on Linux, but might not work completely on other platforms. Specifically, on Windows you need to setup gnumex and gcc (typically mingw or cygwin). (covm_mex and sumskipnan_mex are strongly recommended for performance reasons, train.mex and svmtrain_mex are needed when using support vector machines) Precompiled binaries are provided for 32bit Windows with Matlab 7.x (tested with 7.1 and 7.6). Please note, for Matlab 7.2 or earlier, the pre-compiled mex-files need to be renamed to *.dll; Maybe a *.lnk file with the extension *.dll will also do. d) run "naninsttest" from the Octave/Matlab command line prompt >> naninsttest In case of success, You should see the following message: >> naninsttest NANINSTTEST successful - your NaN-tools are correctly installed This checks whether the installation was successful. ----------------------- $Id: INSTALL 12492 2014-01-10 13:34:15Z schloegl $ Copyright (c) 2000-2003,2005,2006,2009,2010,2011,2014 by Alois Schloegl This is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ NaN/inst/0000775002356700235670000000000012546555135012732 5ustar schloeglschloeglNaN/inst/kappa.m0000664002356700235670000001307411715164711014202 0ustar schloeglschloeglfunction [kap,se,H,z,p0,SA,R]=kappa(d,c,arg3,w) % KAPPA estimates Cohen's kappa coefficient % and related statistics % % [...] = kappa(d1,d2); % NaN's are handled as missing values and are ignored % [...] = kappa(d1,d2,'notIgnoreNAN'); % NaN's are handled as just another Label. % [kap,sd,H,z,ACC,sACC,MI] = kappa(...); % X = kappa(...); % % d1 data of scorer 1 % d2 data of scorer 2 % % kap Cohen's kappa coefficient point % se standard error of the kappa estimate % H Concordance matrix, i.e. confusion matrix % z z-score % ACC overall agreement (accuracy) % sACC specific accuracy % MI Mutual information or transfer information (in [bits]) % X is a struct containing all the fields above % For two classes, a number of additional summary statistics including % TPR, FPR, FDR, PPV, NPF, F1, dprime, Matthews Correlation coefficient (MCC) or % Phi coefficient (PHI=MCC), Specificity and Sensitivity % are provided. Note, the positive category must the larger label (in d and c), otherwise % the confusion matrix becomes transposed and the summary statistics are messed up. % % % Reference(s): % [1] Cohen, J. (1960). A coefficient of agreement for nominal scales. Educational and Psychological Measurement, 20, 37-46. % [2] J Bortz, GA Lienert (1998) Kurzgefasste Statistik f|r die klassische Forschung, Springer Berlin - Heidelberg. % Kapitel 6: Uebereinstimmungsmasze fuer subjektive Merkmalsurteile. p. 265-270. % [3] http://www.cmis.csiro.au/Fiona.Evans/personal/msc/html/chapter3.html % [4] Kraemer, H. C. (1982). Kappa coefficient. In S. Kotz and N. L. Johnson (Eds.), % Encyclopedia of Statistical Sciences. New York: John Wiley & Sons. % [5] http://ourworld.compuserve.com/homepages/jsuebersax/kappa.htm % [6] http://en.wikipedia.org/wiki/Receiver_operating_characteristic % $Id: kappa.m 9608 2012-02-10 09:56:25Z schloegl $ % Copyright (c) 1997-2006,2008,2009,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % % BioSig is free software: you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation, either version 3 of the License, or % (at your option) any later version. % % BioSig is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with BioSig. If not, see . mode.ignoreNAN = 1; kk = []; if nargin>2 if ischar(arg3) if strcmpi(arg3,'notIgnoreNAN') mode.ignoreNAN = 0; end else kk = arg3; end end; if nargin<4 w = []; end; if nargin>1, d = d(:); c = c(:); tmp = [d;c]; maxtmp = max(tmp); tmp(isnan(tmp)) = maxtmp+1; [X.Label,i,j] = unique(tmp); c = j(1+numel(d):end); d = j(1:numel(d)); kk = max(j); maxCLASS = kk - any(tmp>maxtmp); if mode.ignoreNAN, if any(j > maxCLASS) % fprintf(2,'Warning KAPPA: some elements are NaN. These are handled as missing values and are ignored.\n'); % fprintf(2,'If NaN should be handled as just another label, use kappa(..,''notIgnoreNaN'').\n'); ix = find((c<=maxCLASS) & (d<=maxCLASS)); d = d(ix); c=c(ix); if ~isempty(w), w = w(ix); end; kk = kk - 1; end; X.Label(X.Label>maxtmp) = []; else X.Label(X.Label>maxtmp) = NaN; end; if isempty(w) H = full( sparse (d, c, 1, kk, kk) ); elseif ~isempty(w), H = full( sparse (d, c, w, kk, kk) ); end; else X.Label = 1:min(size(d)); H = d(X.Label,X.Label); end; s = warning; warning('off'); N = sum(H(:)); p0 = sum(diag(H))/N; %accuracy of observed agreement, overall agreement %OA = sum(diag(H))/N); p_i = sum(H,1); pi_ = sum(H,2)'; SA = 2*diag(H)'./(p_i+pi_); % specific agreement pe = (p_i*pi_')/(N*N); % estimate of change agreement px = sum(p_i.*pi_.*(p_i+pi_))/(N*N*N); %standard error kap = (p0-pe)/(1-pe); sd = sqrt((pe+pe*pe-px)/(N*(1-pe*pe))); %standard error se = sqrt((p0+pe*pe-px)/N)/(1-pe); if ~isreal(se), z = NaN; else z = kap/se; end if ((1 < nargout) && (nargout<7)) warning(s); return; end; % Nykopp's entropy pwi = sum(H,2)/N; % p(x_i) pwj = sum(H,1)/N; % p(y_j) pji = H./repmat(sum(H,2),1,size(H,2)); % p(y_j | x_i) R = - sumskipnan(pwj.*log2(pwj)) + sumskipnan(pwi'*(pji.*log2(pji))); if (nargout>1), return; end; X.kappa = kap; X.kappa_se = se; X.data = H; X.H = X.data; X.z = z; X.ACC = p0; X.sACC = SA; X.MI = R; X.datatype = 'confusion'; if length(H)==2, % see http://en.wikipedia.org/wiki/Receiver_operating_characteristic % Note that the confusion matrix used here is has positive values in % the 2nd row and column, moreover the true values are indicated by % rows (transposed). Thus, in summary H(1,1) and H(2,2) are exchanged % as compared to the wikipedia article. X.TP = H(2,2); X.TN = H(1,1); X.FP = H(1,2); X.FN = H(2,1); X.FNR = H(2,1) / sum(H(2,:)); X.FPR = H(1,2) / sum(H(1,:)); X.TPR = H(2,2) / sum(H(2,:)); X.PPV = H(2,2) / sum(H(:,2)); X.NPV = H(1,1) / sum(H(:,1)); X.FDR = H(1,2) / sum(H(:,2)); X.MCC = det(H) / sqrt(prod([sum(H), sum(H')])); X.PHI = X.MCC; X.F1 = 2 * X.TP / (sum(H(2,:)) + sum(H(:,2))); X.Sensitivity = X.TPR; %% hit rate, recall X.Specificity = 1 - X.FPR; X.Precision = X.PPV; X.dprime = norminv(X.TPR) - norminv(X.FDR); end; kap = X; warning(s); NaN/inst/decovm.m0000664002356700235670000000505111601145313014346 0ustar schloeglschloeglfunction [mu,sd,COV,xc,M,R2]=decovm(XCN,NN) % decompose extended covariance matrix into mean (mu), % standard deviation, the (pure) Covariance (COV), % correlation (xc) matrix and the correlation coefficients R2. % NaN's are condsidered as missing values. % [mu,sd,COV,xc,N,R2]=decovm(ECM[,NN]) % % ECM is the extended covariance matrix % NN is the number of elements, each estimate (in ECM) is based on % % see also: MDBC, COVM, R2 % $Id: decovm.m 2140 2009-07-02 12:03:55Z schloegl $ % Copyright (c) 1999-2002,2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. [r,c]=size(XCN); if r~=c, fprintf(2,'Warning DECOVM: input argument is not a square matrix\n'); XCN = ecovm(XCN); c = c + 1; else M = XCN(1,1); if nargin<2, XCN = XCN/(XCN(1,1)); else %if nargin==2 XCN = XCN./(NN); end; if any(isnan(XCN(:))), warning('DECOVM: Extended Covariance Matrix should not contain NaN''s'); end; if 0, %det(XCN)<0; % check removed for performance reasons warning('DECOVM: Extended Covariance Matrix must be non-negative definite'); end; end; mu = XCN(1,2:c); COV = XCN(2:c,2:c) - mu'*mu; sd = sqrt(diag(COV))'; if nargout<4, return; end; xc = COV./(sd'*sd); M = XCN(1,1); if nargout<6, return; end; R2 = xc.*xc; return; mu=XCN(2:N,1)/XCN(1,1); COV=(XCN(2:N,2:N)/XCN(1,1)-XCN(2:N,1)*XCN(1,2:N)/XCN(1,1)^2); sd=sqrt(diag(COV)); xc=COV./(sd*sd'); % function [ECM] = ecovm(signal); % Generates extended Covariance matrix, % ECM= [l signal]'*[l signal]; % l is a matching column of 1's % ECM is additive, i.e. it can be applied to subsequent blocks and summed up afterwards % [ECM1] = ecovm(s1); % [ECM2] = ecovm(s1); % [ECM] = ecovm([s1;s2]); % ECM1+ECM2==ECM; % % SS=sum(signal); ECM=[[size(signal,1),SS];[SS',signal'*signal]]; NaN/inst/ttest.m0000664002356700235670000000763011553522126014250 0ustar schloeglschloeglfunction [h, pval, ci, stats] = ttest (x, m, alpha, tail, vartype, DIM) % TTEST (paired) t-test % For a sample X from a normal distribution with unknown mean and % variance, perform a t-test of the null hypothesis `mean (X) == M'. % Under the null, the test statistic T follows a Student % distribution with `DF = length (X) - 1' degrees of freedom. % % TTEST treads NaNs as "Missing values" and ignores these. % % H = ttest(x,m) % tests Null-hypothesis that mean of x is m. % H = ttest(x,y) % size of x and size of y must match, it is tested whether the % difference x-y is significantly different to m=0; % H = ttest(x,y,alpha) % H = ttest(x,y,alpha,tail) % H = ttest(x,y,alpha,tail,DIM) % [H,PVAL] = ttest(...) % % H=1 indicates a rejection of the Null-hypothesis at a significance % level of alpha (default alpha = 0.05). % % With the optional argument string TAIL, the alternative of interest % can be selected. If TAIL is '!=' or '<>' or 'both', the null is tested % against the two-sided Alternative `mean (X) ~= mean (Y)'. If TAIL % is '>' or 'right', the one-sided Alternative `mean (X) > mean (Y)' is used. % Similarly for '<' or 'left', the one-sided Alternative `mean (X) < mean % (Y)' is used. The default is the two-sided case. % % H returns whether the Null-Hypotheses must be rejected. % The p-value of the test is returned in PVAL. % % TTEST works on the first non-singleton dimension or on DIM. % % If no output argument is given, the p-value of the test is % displayed. % %%% not supported yet % [h,p,ci] = ttest(...) % [h,p,ci,stats] = ttest(...) % $Id$ % Copyright (C) 1995, 1996, 1997, 1998, 2000, 2002, 2005, 2006, 2007 % Kurt Hornik % Copyright (C) 2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software: you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation, either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program. If not, see . if ((nargin < 2) || (nargin > 5) || nargout > 4) print_usage ; end if (nargin == 2) alt = '~='; end if (nargin < 3) || isempty(alpha) alpha = .05; end if (nargin < 4) || isempty(tail) tail = '~='; end if (~ ischar (tail)) error ('ttest: tail must be a string'); end if (nargin < 5) || isempty(vartype) vartype = 'equal'; end if ~strcmp(vartype,'equal') error ('test: vartype not supported') end if nargin<6, DIM = find(size(x)>1,1); end; if isempty(DIM), DIM=1; end; szx = size(x); szm = size(m); szx(DIM) = 1; szm(DIM) = 1; if size(m,DIM)==1 ; elseif size(x,DIM) == size(m,DIM) x = x-m; m = zeros(szm); else error ('ttest: dimension of X and Y do not fit'); end [S, N] = sumskipnan(x, DIM); stats.df = N - 1; stats.sd = std (x); stats.tstat = sqrt (N) .* (S./N - m) ./ stats.sd; cdf = tcdf (stats.tstat, stats.df); if (strcmp (tail, '~=') || strcmp (tail, '!=') || strcmp (tail, '<>')) || strcmp(tail,'both'), pval = 2 * min (cdf, 1 - cdf); elseif strcmp (tail, '>') || strcmp(tail,'right'), pval = 1 - cdf; elseif strcmp (tail, '<') || strcmp(tail,'left'), pval = cdf; else error ('ttest: option %s not recognized', tail); end h = pval < alpha; if (nargout == 0) fprintf(1,' pval: %g\n', pval); end NaN/inst/sem.m0000664002356700235670000000363611553522126013673 0ustar schloeglschloeglfunction [SE,M]=sem(x,DIM, W) % SEM calculates the standard error of the mean % % [SE,M] = SEM(x [, DIM [,W]]) % calculates the standard error (SE) in dimension DIM % the default DIM is the first non-single dimension % M returns the mean. % Can deal with complex data, too. % % DIM dimension % 1: SEM of columns % 2: SEM of rows % N: SEM of N-th dimension % default or []: first DIMENSION, with more than 1 element % W weights to compute weighted mean and s.d. (default: []) % if W=[], all weights are 1. % number of elements in W must match size(x,DIM) % % features: % - can deal with NaN's (missing values) % - weighting of data % - dimension argument % - compatible to Matlab and Octave % % see also: SUMSKIPNAN, MEAN, VAR, STD % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % Copyright (C) 2000-2003,2008,2009 by Alois Schloegl % $Id: sem.m 8223 2011-04-20 09:16:06Z schloegl $ % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if nargin>2, [S,N,SSQ] = sumskipnan(x,DIM,W); elseif nargin>1, [S,N,SSQ] = sumskipnan(x,DIM); else [S,N,SSQ] = sumskipnan(x); end M = S./N; SE = (SSQ.*N - real(S).^2 - imag(S).^2)./(N.*N.*(N-1)); SE(SE<=0) = 0; % prevent negative value caused by round-off error SE = sqrt(real(SE)); NaN/inst/load_fisheriris.m0000664002356700235670000000372311601145313016243 0ustar schloeglschloegl% LOAD_FISHERIRIS % loads famous iris data set from Fisher, 1936 [1]. % % References: % [1] Fisher,R.A. "The use of multiple measurements in taxonomic problems" % Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to Mathematical Statistics" (John Wiley, NY, 1950). % [2] Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis. % (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218. % $Id$ % Copyright (C) 2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. if exist('OCTAVE_VERSION','builtin') if ~exist('iris.data','file') if strncmp(computer,'PCWIN',5) fprintf(1,'Download http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data and save in local directory %s\nPress any key to continue ...\n',pwd); else unix('wget http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'); end; end; tmp = fopen('iris.data'); species=fread(tmp,[1,inf],'uint8=>char'); fclose(tmp); [meas,tmp,species]=str2double(species,','); meas = meas(:,1:4); species = species(:,5); else load fisheriris; end; NaN/inst/var.m0000664002356700235670000000513411553522126013672 0ustar schloeglschloeglfunction y=var(x,opt,DIM,W) % VAR calculates the variance. % % y = var(x [, opt[, DIM]]) % calculates the variance in dimension DIM % the default DIM is the first non-single dimension % % opt 0: normalizes with N-1 [default] % 1: normalizes with N % DIM dimension % 1: VAR of columns % 2: VAR of rows % N: VAR of N-th dimension % default or []: first DIMENSION, with more than 1 element % W weights to compute weighted variance (default: []) % if W=[], all weights are 1. % number of elements in W must match size(x,DIM) % % usage: % var(x) % var(x, opt, DIM) % var(x, [], DIM) % var(x, W, DIM) % var(x, opt, DIM, W) % % features: % - can deal with NaN's (missing values) % - weighting of data % - dimension argument % - compatible to Matlab and Octave % % see also: MEANSQ, SUMSQ, SUMSKIPNAN, MEAN, RMS, STD, % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: var.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2003,2006,2009,2010 by Alois Schloegl % This is part of the NaN-toolbox for Octave and Matlab % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if nargin<3, DIM = []; end; if nargin==1, W = []; opt = []; elseif any(nargin==[2,3]) if (numel(opt)<2), W = []; else W = opt; opt = []; end; elseif (nargin==4) && (numel(opt)<2) && (numel(DIM)<2), ; else fprintf(1,'Error VAR: incorrect usage\n'); help var; return; end; if isempty(opt), opt = 0; end; if isempty(DIM), DIM = find(size(x)>1,1); if isempty(DIM), DIM=1; end; end; [y,n,ssq] = sumskipnan(x,DIM,W); if all(ssq(:).*n(:) > 2*(y(:).^2)), %% rounding error is neglectable y = ssq - y.*y./n; else %% rounding error is not neglectable szx = size(x); szy = size(y); if length(szy) % This functions is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin==2, DIM=[]; opt=[]; elseif nargin==3, DIM=[]; elseif nargin==4, else fprintf('Error MOMENT: invalid number of arguments\n'); return; end; if p<=0; fprintf('Error MOMENT: invalid model order p=%f\n',p); return; end; if isnumeric(opt) || ~isnumeric(DIM), tmp = DIM; DIM = opt; opt = tmp; end; if isempty(opt), opt='r'; end; if isempty(DIM), DIM = find(size(i)>1,1); if isempty(DIM), DIM=1; end; end; N = nan; if isstruct(i), if isfield(i,'HISTOGRAM'), sz = size(i.H)./size(i.X); X = repmat(i.X,sz); if any(opt=='c'), N = sumskipnan(i.H,1); % N N = max(N-1,0); % for unbiased estimation S = sumskipnan(i.H.*X,1); % sum X = X - repmat(S./N, size(X)./size(S)); % remove mean end; if any(opt=='a'), X = abs(X); end; [M,n] = sumskipnan(X.^p.*i.H,1); else warning('invalid datatype') end; else if any(opt=='c'), [S,N] = sumskipnan(i,DIM); % gemerate N and SUM N = max(N-1,0); % for unbiased estimation i = i - repmat(S./N, size(i)./size(S)); % remove mean end; if any(opt=='a'), i = abs(i); end; [M,n] = sumskipnan(i.^p,DIM); end; if isnan(N), N=n; end; M = M./N; NaN/inst/nanfilter1uc.m0000664002356700235670000000330511601145313015464 0ustar schloeglschloeglfunction [x,z] = nanfilter1uc(uc,x,z); % NANFILTER1UC is an adaptive filter for data with missing values encoded as NaN. % % [Y,Z] = nanfilter1uc(uc,X [, Z]); % % if X contains no missing data, NANFILTER behaves like FILTER(uc,[1,uc-1],X[,Z]). % % see also: FILTER, NANFILTER, SUMSKIPNAN % $Id$ % Copyright (C) 2010,2011 by Alois Schloegl % This function is part of the NaN-toolbox available at % http://pub.ist.ac.at/~schloegl/matlab/NaN/ and % http://octave.svn.sourceforge.net/viewvc/octave/trunk/octave-forge/extra/NaN/inst/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA na = 2; %length(A); nb = 2; %length(B); if any(size(x)==1) nc = 1; else nc = size(x,2); end; acN = zeros(1,nc); if nargin<3, z = zeros(1,nc); end; acc = NaN(1,nc); for k = 1:size(x,1), ix = isnan(x(k,:)); acN = acN.*ix+1; UC1 = ((1-uc).^acN); acc(~ix) = (1-UC1(~ix)) .* x(k,~ix) + z(~ix); % / A{1}; ix = isnan(acc); acc(ix) = x(k,ix); z = (1-uc) * acc; x(k,:) = acc; end; NaN/inst/row_col_deletion.m0000664002356700235670000001032211601145313016415 0ustar schloeglschloeglfunction [rix,cix] = row_col_deletion(d,c,w) % ROW_COL_DELETION selects the rows and columns for removing any missing values. % A heuristic based on maximizing the number of remaining sample values % is used. In other words, if there are more rows than columns, it is % more likely that a row-wise deletion will be applied and vice versa. % % [rix,cix] = row_col_deletion(d) % [rix,cix] = row_col_deletion(d,c,w) % % Input: % d data (each row is a sample, each column a feature) % c classlabels (not really used) [OPTIONAL] % w weight for each sample vector [OPTIONAL] % Output: % rix selected samples % cix selected columns % % d(rix,cix) does not contain any NaN's i.e. missing values % % see also: TRAIN_SC, TEST_SC % $Id$ % Copyright (C) 2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. if nargin > 2, if isempty(w) || all(w==w(1)), ix = ~isnan(c); else ix = ~any(isnan(c) | isnan(w)); end; d = d(ix,:); %% ignore samples with invalid c or w w = w(ix,:); elseif nargin > 1, d = d(~isnan(c),:); %% ignore samples with invalid c or w w = []; else w = []; end; if 0, % decides whether row-wise or column-wise deletion removes less data. % rix and cix are the resulting index vectors % either row-wise or column-wise deletion, but not a combination of both, is used. % this is obsolete n = numel(d); cix = find(~any(isnan(d),1)); rix = find(~any(isnan(d),2)); nr = length(rix)*size(d,2); % number of elements after row-wise deletion nc = length(cix)*size(d,1); % number of elements after column-wise deletion if (nr>nc) cix = 1:size(d,2); % select all columns %fprintf(1,'row-wise deletion (%i,%i,%i)\n',n,nr,nc); else rix = 1:size(d,1); % select all rows %fprintf(1,'column-wise deletion (%i,%i,%i)\n',n,nr,nc); end; else %% a mix of row- and column-wise deletion is possible if ~isempty(w) && (abs(sum(w)-1) > log2(N)*eps || any(w<0) || any(~isfinite(w))) error('weight vector must contain only non-negative and finite values'); end; [N,M] = size(d); rix = ones(N,1); cix = ones(1,M); while 1; e = ~isnan(d(rix>0,cix>0)); if ~isempty(w), colCost = mean(e, 1, w(rix>0)/sum(w(rix>0)))'; % cost of deleting columns else colCost = mean(e, 1)'; % cost of deleting columns end; rowCost = mean(e, 2); % cost of deleting rows [tmp,ix] = sort([colCost; rowCost]); if abs(tmp(1)-1) < log2(N)*eps, break; end; % stopping criterion if diff(tmp(1:2))==0, warning('row/col deletion: arbitrary selection [%i,%i]',ix(1:2)); end; ix = ix(1); if (ix<=sum(cix)) tmp = find(cix>0); cix(tmp(ix)) = 0; else tmp = find(rix>0); rix(tmp(ix-sum(cix))) = 0; end; end; rix = find(rix); cix = find(cix); end NaN/inst/covm.m0000664002356700235670000001601212540557475014057 0ustar schloeglschloeglfunction [CC,NN] = covm(X,Y,Mode,W) % COVM generates covariance matrix % X and Y can contain missing values encoded with NaN. % NaN's are skipped, NaN do not result in a NaN output. % The output gives NaN only if there are insufficient input data % % COVM(X,Mode); % calculates the (auto-)correlation matrix of X % COVM(X,Y,Mode); % calculates the crosscorrelation between X and Y % COVM(...,W); % weighted crosscorrelation % % Mode = 'M' minimum or standard mode [default] % C = X'*X; or X'*Y correlation matrix % % Mode = 'E' extended mode % C = [1 X]'*[1 X]; % l is a matching column of 1's % C is additive, i.e. it can be applied to subsequent blocks and summed up afterwards % the mean (or sum) is stored on the 1st row and column of C % % Mode = 'D' or 'D0' detrended mode % the mean of X (and Y) is removed. If combined with extended mode (Mode='DE'), % the mean (or sum) is stored in the 1st row and column of C. % The default scaling is factor (N-1). % Mode = 'D1' is the same as 'D' but uses N for scaling. % % C = covm(...); % C is the scaled by N in Mode M and by (N-1) in mode D. % [C,N] = covm(...); % C is not scaled, provides the scaling factor N % C./N gives the scaled version. % % see also: DECOVM, XCOVF % $Id: covm.m 12826 2015-06-18 15:09:49Z schloegl $ % Copyright (C) 2000-2005,2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . global FLAG_NANS_OCCURED; if nargin<3, W = []; if nargin==2, if isnumeric(Y), Mode='M'; else Mode=Y; Y=[]; end; elseif nargin==1, Mode = 'M'; Y = []; elseif nargin==0, error('Missing argument(s)'); end; elseif (nargin==3) && isnumeric(Y) && ~isnumeric(Mode); W = []; elseif (nargin==3) && ~isnumeric(Y) && isnumeric(Mode); W = Mode; Mode = Y; Y = []; elseif (nargin==4) && ~isnumeric(Mode) && isnumeric(Y); ; %% ok else error('invalid input arguments'); end; Mode = upper(Mode); [r1,c1]=size(X); if ~isempty(Y) [r2,c2]=size(Y); if r1~=r2, error('X and Y must have the same number of observations (rows).'); end; else [r2,c2]=size(X); end; persistent mexFLAG2; persistent mexFLAG; if isempty(mexFLAG2) mexFLAG2 = exist('covm_mex','file'); end; if isempty(mexFLAG) mexFLAG = exist('sumskipnan_mex','file'); end; if ~isempty(W) W = W(:); if (r1~=numel(W)) error('Error COVM: size of weight vector does not fit number of rows'); end; %w = spdiags(W(:),0,numel(W),numel(W)); %nn = sum(W(:)); nn = sum(W); else nn = r1; end; if mexFLAG2 && mexFLAG && ~isempty(W), %% the mex-functions here are much slower than the m-scripts below %% however, the mex-functions support weighting of samples. if isempty(FLAG_NANS_OCCURED), %% mex-files require that FLAG_NANS_OCCURED is not empty, %% otherwise, the status of NAN occurence can not be returned. FLAG_NANS_OCCURED = logical(0); % default value end; if any(Mode=='D') || any(Mode=='E'), [S1,N1] = sumskipnan(X,1,W); if ~isempty(Y) [S2,N2] = sumskipnan(Y,1,W); else S2 = S1; N2 = N1; end; if any(Mode=='D'), % detrending mode X = X - ones(r1,1)*(S1./N1); if ~isempty(Y) Y = Y - ones(r1,1)*(S2./N2); end; end; end; if issparse(X) || issparse(Y), fprintf(2,'sumskipnan: sparse matrix converted to full matrix\n'); X=full(X); Y=full(Y); end; [CC,NN] = covm_mex(real(X), real(Y), FLAG_NANS_OCCURED, W); %% complex matrices if ~isreal(X) && ~isreal(Y) [iCC,inn] = covm_mex(imag(X), imag(Y), FLAG_NANS_OCCURED, W); CC = CC + iCC; end; if isempty(Y) Y = X; end; if ~isreal(X) [iCC,inn] = covm_mex(imag(X), real(Y), FLAG_NANS_OCCURED, W); CC = CC - i*iCC; end; if ~isreal(Y) [iCC,inn] = covm_mex(real(X), imag(Y), FLAG_NANS_OCCURED, W); CC = CC + i*iCC; end; if any(Mode=='D') && ~any(Mode=='1'), % 'D1' NN = max(NN-1,0); end; if any(Mode=='E'), % extended mode NN = [nn, N2; N1', NN]; CC = [nn, S2; S1', CC]; end; elseif ~isempty(W), error('Error COVM: weighted COVM requires sumskipnan_mex and covm_mex but it is not available'); %% weighted covm without mex-file support %% this part is not working. elseif ~isempty(Y), if (~any(Mode=='D') && ~any(Mode=='E')), % if Mode == M NN = real(X==X)'*real(Y==Y); FLAG_NANS_OCCURED = any(NN(:) % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin==2, Q = quantile(Y,q/100); elseif nargin==3, Q = quantile(Y,q/100,DIM); else help percentile end; NaN/inst/meandev.m0000664002356700235670000000433111553522126014517 0ustar schloeglschloeglfunction R = meandev(i,DIM) % MEANDEV estimates the Mean deviation % (note that according to [1,2] this is the mean deviation; % not the mean absolute deviation) % % y = meandev(x,DIM) % calculates the mean deviation of x in dimension DIM % % DIM dimension % 1: STATS of columns % 2: STATS of rows % default or []: first DIMENSION, with more than 1 element % % features: % - can deal with NaN's (missing values) % - dimension argument % - compatible to Matlab and Octave % % see also: SUMSKIPNAN, VAR, STD, MAD % % REFERENCE(S): % [1] http://mathworld.wolfram.com/MeanDeviation.html % [2] L. Sachs, "Applied Statistics: A Handbook of Techniques", Springer-Verlag, 1984, page 253. % [3] http://mathworld.wolfram.com/MeanAbsoluteDeviation.html % [4] Kenney, J. F. and Keeping, E. S. "Mean Absolute Deviation." §6.4 in Mathematics of Statistics, Pt. 1, 3rd ed. Princeton, NJ: Van Nostrand, pp. 76-77 1962. % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: meandev.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2002,2010 by Alois Schloegl % This function is part of the NaN-toolbox for Octave and Matlab % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if nargin==1, DIM = find(size(i)>1,1); if isempty(DIM), DIM=1; end; end; [S,N] = sumskipnan(i,DIM); % sum i = i - repmat(S./N,size(i)./size(S)); % remove mean [S,N] = sumskipnan(abs(i),DIM); % %if flag_implicit_unbiased_estim; %% ------- unbiased estimates ----------- n1 = max(N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and STE are INF %else % n1 = N; %end; R = S./n1; NaN/inst/corrcoef.m0000664002356700235670000003444711672347406014725 0ustar schloeglschloeglfunction [R,sig,ci1,ci2,nan_sig] = corrcoef(X,Y,varargin) % CORRCOEF calculates the correlation matrix from pairwise correlations. % The input data can contain missing values encoded with NaN. % Missing data (NaN's) are handled by pairwise deletion [15]. % In order to avoid possible pitfalls, use case-wise deletion or % or check the correlation of NaN's with your data (see below). % A significance test for testing the Hypothesis % 'correlation coefficient R is significantly different to zero' % is included. % % [...] = CORRCOEF(X); % calculates the (auto-)correlation matrix of X % [...] = CORRCOEF(X,Y); % calculates the crosscorrelation between X and Y % % [...] = CORRCOEF(..., Mode); % Mode='Pearson' or 'parametric' [default] % gives the correlation coefficient % also known as the 'product-moment coefficient of correlation' % or 'Pearson''s correlation' [1] % Mode='Spearman' gives 'Spearman''s Rank Correlation Coefficient' % This replaces SPEARMAN.M % Mode='Rank' gives a nonparametric Rank Correlation Coefficient % This is the "Spearman rank correlation with proper handling of ties" % This replaces RANKCORR.M % % [...] = CORRCOEF(..., param1, value1, param2, value2, ... ); % param value % 'Mode' type of correlation % 'Pearson','parametric' % 'Spearman' % 'rank' % 'rows' how do deal with missing values encoded as NaN's. % 'complete': remove all rows with at least one NaN % 'pairwise': [default] % 'alpha' 0.01 : significance level to compute confidence interval % % [R,p,ci1,ci2,nansig] = CORRCOEF(...); % R is the correlation matrix % R(i,j) is the correlation coefficient r between X(:,i) and Y(:,j) % p gives the significance of R % It tests the null hypothesis that the product moment correlation coefficient is zero % using Student's t-test on the statistic t = r*sqrt(N-2)/sqrt(1-r^2) % where N is the number of samples (Statistics, M. Spiegel, Schaum series). % p > alpha: do not reject the Null hypothesis: 'R is zero'. % p < alpha: The alternative hypothesis 'R is larger than zero' is true with probability (1-alpha). % ci1 lower (1-alpha) confidence interval % ci2 upper (1-alpha) confidence interval % If no alpha is provided, the default alpha is 0.01. This can be changed with function flag_implicit_significance. % nan_sig p-value whether H0: 'NaN''s are not correlated' could be correct % if nan_sig < alpha, H1 ('NaNs are correlated') is very likely. % % The result is only valid if the occurence of NaN's is uncorrelated. In % order to avoid this pitfall, the correlation of NaN's should be checked % or case-wise deletion should be applied. % Case-Wise deletion can be implemented % ix = ~any(isnan([X,Y]),2); % [...] = CORRCOEF(X(ix,:),Y(ix,:),...); % % Correlation (non-random distribution) of NaN's can be checked with % [nan_R,nan_sig]=corrcoef(X,isnan(X)) % or [nan_R,nan_sig]=corrcoef([X,Y],isnan([X,Y])) % or [R,p,ci1,ci2] = CORRCOEF(...); % % Further recommandation related to the correlation coefficient: % + LOOK AT THE SCATTERPLOTS to make sure that the relationship is linear % + Correlation is not causation because % it is not clear which parameter is 'cause' and which is 'effect' and % the observed correlation between two variables might be due to the action of other, unobserved variables. % % see also: SUMSKIPNAN, COVM, COV, COR, SPEARMAN, RANKCORR, RANKS, % PARTCORRCOEF, flag_implicit_significance % % REFERENCES: % on the correlation coefficient % [ 1] http://mathworld.wolfram.com/CorrelationCoefficient.html % [ 2] http://www.geography.btinternet.co.uk/spearman.htm % [ 3] Hogg, R. V. and Craig, A. T. Introduction to Mathematical Statistics, 5th ed. New York: Macmillan, pp. 338 and 400, 1995. % [ 4] Lehmann, E. L. and D'Abrera, H. J. M. Nonparametrics: Statistical Methods Based on Ranks, rev. ed. Englewood Cliffs, NJ: Prentice-Hall, pp. 292, 300, and 323, 1998. % [ 5] Press, W. H.; Flannery, B. P.; Teukolsky, S. A.; and Vetterling, W. T. Numerical Recipes in FORTRAN: The Art of Scientific Computing, 2nd ed. Cambridge, England: Cambridge University Press, pp. 634-637, 1992 % [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html % on the significance test of the correlation coefficient % [11] http://www.met.rdg.ac.uk/cag/STATS/corr.html % [12] http://www.janda.org/c10/Lectures/topic06/L24-significanceR.htm % [13] http://faculty.vassar.edu/lowry/ch4apx.html % [14] http://davidmlane.com/hyperstat/B134689.html % [15] http://www.statsoft.com/textbook/stbasic.html%Correlations % others % [20] http://www.tufts.edu/~gdallal/corr.htm % [21] Fisher transformation http://en.wikipedia.org/wiki/Fisher_transformation % $Id: corrcoef.m 9387 2011-12-15 10:42:14Z schloegl $ % Copyright (C) 2000-2004,2008,2009,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software: you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation, either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program. If not, see . % Features: % + handles missing values (encoded as NaN's) % + pairwise deletion of missing data % + checks independence of missing values (NaNs) % + parametric and non-parametric (rank) correlation % + Pearson's correlation % + Spearman's rank correlation % + Rank correlation (non-parametric, Spearman rank correlation with proper handling of ties) % + is fast, using an efficient algorithm O(n.log(n)) for calculating the ranks % + significance test for null-hypthesis: r=0 % + confidence interval included % - rank correlation works for cell arrays, too (no check for missing values). % + compatible with Octave and Matlab global FLAG_NANS_OCCURED; NARG = nargout; % needed because nargout is not reentrant in Octave, and corrcoef is recursive mode = []; if nargin==1 Y = []; Mode='Pearson'; elseif nargin==0 fprintf(2,'Error CORRCOEF: Missing argument(s)\n'); elseif nargin>1 if ischar(Y) varg = [Y,varargin]; Y=[]; else varg = varargin; end; if length(varg)<1, Mode = 'Pearson'; elseif length(varg)==1, Mode = varg{1}; else for k = 2:2:length(varg), mode = setfield(mode,lower(varg{k-1}),varg{k}); end; if isfield(mode,'mode') Mode = mode.mode; end; end; end; if isempty(Mode) Mode='pearson'; end; Mode=[Mode,' ']; FLAG_WARNING = warning; % save warning status warning('off'); [r1,c1]=size(X); if ~isempty(Y) [r2,c2]=size(Y); if r1~=r2, fprintf(2,'Error CORRCOEF: X and Y must have the same number of observations (rows).\n'); return; end; NN = real(~isnan(X)')*real(~isnan(Y)); else [r2,c2]=size(X); NN = real(~isnan(X)')*real(~isnan(X)); end; %%%%% generate combinations using indices for pairwise calculation of the correlation YESNAN = any(isnan(X(:))) | any(isnan(Y(:))); if YESNAN, FLAG_NANS_OCCURED=(1==1); if isfield(mode,'rows') if strcmp(mode.rows,'complete') ix = ~any([X,Y],2); X = X(ix,:); if ~isempty(Y) Y = Y(ix,:); end; YESNAN = 0; NN = size(X,1); elseif strcmp(mode.rows,'all') fprintf(1,'Warning: data contains NaNs, rows=pairwise is used.'); %%NN(NN < size(X,1)) = NaN; elseif strcmp(mode.rows,'pairwise') %%% default end; end; end; if isempty(Y), IX = ones(c1)-diag(ones(c1,1)); [jx, jy ] = find(IX); [jxo,jyo] = find(IX); R = eye(c1); else IX = sparse([],[],[],c1+c2,c1+c2,c1*c2); IX(1:c1,c1+(1:c2)) = 1; [jx,jy] = find(IX); IX = ones(c1,c2); [jxo,jyo] = find(IX); R = zeros(c1,c2); end; if strcmp(lower(Mode(1:7)),'pearson'); % see http://mathworld.wolfram.com/CorrelationCoefficient.html if ~YESNAN, [S,N,SSQ] = sumskipnan(X,1); if ~isempty(Y), [S2,N2,SSQ2] = sumskipnan(Y,1); CC = X'*Y; M1 = S./N; M2 = S2./N2; cc = CC./NN - M1'*M2; R = cc./sqrt((SSQ./N-M1.*M1)'*(SSQ2./N2-M2.*M2)); else CC = X'*X; M = S./N; cc = CC./NN - M'*M; v = SSQ./N - M.*M; %max(N-1,0); R = cc./sqrt(v'*v); end; else if ~isempty(Y), X = [X,Y]; end; for k = 1:length(jx), %ik = ~any(isnan(X(:,[jx(k),jy(k)])),2); ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k))); [s,n,s2] = sumskipnan(X(ik,[jx(k),jy(k)]),1); v = (s2-s.*s./n)./n; cc = X(ik,jx(k))'*X(ik,jy(k)); cc = cc/n(1) - prod(s./n); %r(k) = cc./sqrt(prod(v)); R(jxo(k),jyo(k)) = cc./sqrt(prod(v)); end; end elseif strcmp(lower(Mode(1:4)),'rank'); % see [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html if ~YESNAN, if isempty(Y) R = corrcoef(ranks(X)); else R = corrcoef(ranks(X),ranks(Y)); end; else if ~isempty(Y), X = [X,Y]; end; for k = 1:length(jx), %ik = ~any(isnan(X(:,[jx(k),jy(k)])),2); ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k))); il = ranks(X(ik,[jx(k),jy(k)])); R(jxo(k),jyo(k)) = corrcoef(il(:,1),il(:,2)); end; X = ranks(X); end; elseif strcmp(lower(Mode(1:8)),'spearman'); % see [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html if ~isempty(Y), X = [X,Y]; end; n = repmat(nan,c1,c2); if ~YESNAN, iy = ranks(X); % calculates ranks; for k = 1:length(jx), [R(jxo(k),jyo(k)),n(jxo(k),jyo(k))] = sumskipnan((iy(:,jx(k)) - iy(:,jy(k))).^2); % NN is the number of non-missing values end; else for k = 1:length(jx), %ik = ~any(isnan(X(:,[jx(k),jy(k)])),2); ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k))); il = ranks(X(ik,[jx(k),jy(k)])); % NN is the number of non-missing values [R(jxo(k),jyo(k)),n(jxo(k),jyo(k))] = sumskipnan((il(:,1) - il(:,2)).^2); end; X = ranks(X); end; R = 1 - 6 * R ./ (n.*(n.*n-1)); elseif strcmp(lower(Mode(1:7)),'partial'); fprintf(2,'Error CORRCOEF: use PARTCORRCOEF \n',Mode); return; elseif strcmp(lower(Mode(1:7)),'kendall'); fprintf(2,'Error CORRCOEF: mode ''%s'' not implemented yet.\n',Mode); return; else fprintf(2,'Error CORRCOEF: unknown mode ''%s''\n',Mode); end; if (NARG<2), warning(FLAG_WARNING); % restore warning status return; end; % CONFIDENCE INTERVAL if isfield(mode,'alpha') alpha = mode.alpha; elseif exist('flag_implicit_significance','file'), alpha = flag_implicit_significance; else alpha = 0.01; end; % fprintf(1,'CORRCOEF: confidence interval is based on alpha=%f\n',alpha); % SIGNIFICANCE TEST R(isnan(R))=0; tmp = 1 - R.*R; tmp(tmp<0) = 0; % prevent tmp<0 i.e. imag(t)~=0 t = R.*sqrt(max(NN-2,0)./tmp); if exist('t_cdf','file'); sig = t_cdf(t,NN-2); elseif exist('tcdf','file')>1; sig = tcdf(t,NN-2); else fprintf('CORRCOEF: significance test not completed because of missing TCDF-function\n') sig = repmat(nan,size(R)); end; sig = 2 * min(sig,1 - sig); if NARG<3, warning(FLAG_WARNING); % restore warning status return; end; tmp = R; %tmp(ix1 | ix2) = nan; % avoid division-by-zero warning z = log((1+tmp)./(1-tmp))/2; % Fisher transformation [21] %sz = 1./sqrt(NN-3); % standard error of z sz = sqrt(2)*erfinv(1-alpha)./sqrt(NN-3); % confidence interval for alpha of z ci1 = tanh(z-sz); ci2 = tanh(z+sz); %ci1(isnan(ci1))=R(isnan(ci1)); % in case of isnan(ci), the interval limits are exactly the R value %ci2(isnan(ci2))=R(isnan(ci2)); if (NARG<5) || ~YESNAN, nan_sig = repmat(NaN,size(R)); warning(FLAG_WARNING); % restore warning status return; end; %%%%% ----- check independence of NaNs (missing values) ----- [nan_R, nan_sig] = corrcoef(X,double(isnan(X))); % remove diagonal elements, because these have not any meaning % nan_sig(isnan(nan_R)) = nan; % remove diagonal elements, because these have not any meaning % nan_R(isnan(nan_R)) = 0; if 0, any(nan_sig(:) < alpha), tmp = nan_sig(:); % Hack to skip NaN's in MIN(X) min_sig = min(tmp(~isnan(tmp))); % Necessary, because Octave returns NaN rather than min(X) for min(NaN,X) fprintf(1,'CORRCOFF Warning: Missing Values (i.e. NaNs) are not independent of data (p-value=%f)\n', min_sig); fprintf(1,' Its recommended to remove all samples (i.e. rows) with any missing value (NaN).\n'); fprintf(1,' The null-hypotheses (NaNs are uncorrelated) is rejected for the following parameter pair(s).\n'); [ix,iy] = find(nan_sig < alpha); disp([ix,iy]) end; %%%%% ----- end of independence check ------ warning(FLAG_WARNING); % restore warning status return; NaN/inst/nanfilter.m0000664002356700235670000000413211601145313015052 0ustar schloeglschloeglfunction [Y,Z] = nanfilter(B,A,X,z); % NANFILTER is able to filter data with missing values encoded as NaN. % % [Y,Z] = nanfilter(B,A,X [, Z]); % % If X contains no missing data, NANFILTER should behave like FILTER. % NaN-values are handled gracefully. % % WARNING: missing values can introduce aliasing - causing unintended results. % Moreover, the behavior of bandpass and highpass filters in case of missing values % is not fully understood, and might contain some pitfalls. % % see also: FILTER, SUMSKIPNAN, NANFFT, NANCONV, NANFILTER1UC % $Id$ % Copyright (C) 2005,2011 by Alois Schloegl % This function is part of the NaN-toolbox available at % http://pub.ist.ac.at/~schloegl/matlab/NaN/ and % http://octave.svn.sourceforge.net/viewvc/octave/trunk/octave-forge/extra/NaN/inst/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA warning('NANFILTER is experimental. For more details see HELP NANFILTER'); na = length(A); nb = length(B); if any(size(X)==1) nc = 1; else nc = size(X,2); end; if nargin<4, [t,Z.S] = filter(B,A,zeros(na+nb,nc)); [t,Z.N] = filter(B,A,zeros(na+nb,nc)); elseif isnumeric(z), Z.S = z; [t, Z.N] = filter(B, A, zeros(na+nb,nc)); elseif isstruct(z), Z = z; end; NX = isnan(X); X(NX) = 0; [Y , Z.S] = filter(B, A, X, Z.S); [NY, Z.N] = filter(B, A, ~NX, Z.N); Y = (sum(B)/sum(A)) * Y./NY; NaN/inst/bland_altman.m0000664002356700235670000001005711622555413015520 0ustar schloeglschloeglfunction RES = bland_altman(data,group,arg3) % BLAND_ALTMANN shows the Bland-Altman plot of two columns of measurements % and computes several summary results. % % bland_altman(m1, m2 [,group]) % bland_altman(data [, group]) % R = bland_altman(...) % % m1,m2 are two colums with the same number of elements % containing the measurements. m1,m2 can be also combined % in a single two column data matrix. % group [optional] indicates which measurements belong to the same group % This is useful to account for repeated measurements. % % % References: % [1] JM Bland and DG Altman, Measuring agreement in method comparison studies. % Statistical Methods in Medical Research, 1999; 8; 135. % doi:10.1177/09622802990080204 % [2] P.S. Myles, Using the Bland– Altman method to measure agreement with repeated measures % British Journal of Anaesthesia 99(3):309–11 (2007) % doi:10.1093/bja/aem214 % $Id$ % Copyright (C) 2010,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. if nargin<2, group = []; end; if nargin<3, arg3 = []; end; if (size(data,2)==1) data = [data, group]; group = arg3; end; D = data * [1;-1]; M = data * [1;1]/2; RES.corrcoef = corrcoef(data(:,1),data(:,2),'spearman'); [REs.cc,RES.p] = corrcoef(M,D,'spearman'); if (RES.p<0.05) warning('A regression model according to section 3.2 [1] should be used'); %% TODO: implement support for this type of data. RES.a = [ones(size(data,1),1),D]\M; RES.b = [ones(size(data,1),1),M]\D; end; if isempty(group) G = [1:size(data,1)]'; m = ones(size(data,1),1); d = D; RES.Bias = mean(d,1); RES.Var = var(d); elseif ~isempty(group) %% TODO: this is not finished warning('analysis of data with repetitions is experimental - it might yield incorrect results - you are warned.!') [G,I,J] = unique (group); R = zeros(size(data)); m = repmat(NaN,length(G),1); n = repmat(NaN,length(G),1); d = repmat(NaN,length(G),1); d2 = repmat(NaN,length(G),1); data2 = repmat(NaN,length(G),size(data,2)); SW2 = repmat(NaN,length(G),size(data,2)); for i = 1:length(G), ix = find(group==G(i)); n(i) = length(ix); % IX((i-1)*N+1:i*N) = ix(ceil(rand(N,1)*n(i))); [R(ix,:), data2(i,:)] = center(data(ix,:),1); d(i) = mean(D(ix,:),1); m(i) = mean(M(ix,:),1); d2(i) = mean(D(ix,:).^2,1); RES.SW2(i,:) = var(data(ix,:),[],1); RES.avg(i,:) = mean(data(ix,:),1); end; W = 1./n(J); RES.SSW = sumskipnan(R.^2,1,W); RES.SSB = var(data,[],1,W)*sum(W)*(sum(W)-1); RES.sigma2_w= RES.SSW/(sum(W)*(length(G)-1)); RES.sigma2_u= RES.SSB/(sum(W)*(length(G)-1)) - RES.sigma2_w/(length(G)); RES.group = bland_altman(data2); % FIXME: this plot shows incorrect interval, it does not account for the group/repeated samples. RES.repeatability_coefficient1 = 2.77*sqrt(var(R,1,1)); % variance with factor group removed RES.repeatability_coefficient = 2.77*sqrt(mean(SW2,1)); % variance with factor group removed RES.std_d_ = std(d); RES.std_D_ = std(D); RES.std_m_ = std(m); RES.n = n; return; D = d; M = m; % RES.sigma2_dw = RES.Bias = mean(d,1,[],n); end; plot(M,D,'o', [min(M),max(M)]', [0,0]','k--', [min(M),max(M)]', [1,1,1; 0,1.96,-1.96]'*[RES.Bias;std(D)]*[1,1], 'k-'); xlabel('mean'); ylabel('difference'); NaN/inst/mean.m0000664002356700235670000000652112404630545014024 0ustar schloeglschloeglfunction [y]=mean(x,DIM,opt,W) % MEAN calculates the mean of data elements. % % y = mean(x [,DIM] [,opt] [, W]) % % DIM dimension % 1 MEAN of columns % 2 MEAN of rows % N MEAN of N-th dimension % default or []: first DIMENSION, with more than 1 element % % opt options % 'A' arithmetic mean % 'G' geometric mean % 'H' harmonic mean % % W weights to compute weighted mean (default: []) % if W=[], all weights are 1. % number of elements in W must match size(x,DIM) % % usage: % mean(x) % mean(x,DIM) % mean(x,opt) % mean(x,opt,DIM) % mean(x,DIM,opt) % mean(x,DIM,W) % mean(x,DIM,opt,W); ' % % features: % - can deal with NaN's (missing values) % - weighting of data % - dimension argument also in Octave % - compatible to Matlab and Octave % % see also: SUMSKIPNAN, MEAN, GEOMEAN, HARMMEAN % % $Id: mean.m 12706 2014-09-12 17:46:13Z schloegl $ % Copyright (C) 2000-2004,2008,2009,2011 by Alois Schloegl % This is part of the NaN-toolbox. For more details see % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin==1, %------ case: mean(x) W = []; DIM=[]; opt='a'; elseif (nargin==2) W = []; %if ~isnumeric(DIM), %>=65;%abs('A'), if (DIM>64) %abs('A'), %------ case: mean(x,opt) opt=DIM; DIM=[]; elseif (DIM>length(size(x))) y=x; return; else %------ case: mean(x,DIM) opt='a'; end; elseif (nargin == 3), if isnumeric(DIM) && isnumeric(opt) %------ case: mean(x,DIM,W) W = opt; opt='a'; elseif (DIM>64) %abs('A'), %------ case: mean(x,opt,DIM) %if ~isnumeric(DIM), %>=65;%abs('A'), tmp=opt; opt=DIM; DIM=tmp; W = []; elseif (DIM>length(size(x))) y=x; return; else %------ case: mean(x,DIM,opt) W = []; end; elseif nargin==4, %------ case: mean(x,DIM,opt,W) ; else help mean % fprintf(1,'usage: mean(x) or mean(x,DIM) or mean(x,opt,DIM) or mean(x,DIM,opt) or mean(x,DIM,W) or mean(x,DIM,opt,W); ' end; if isempty(opt) opt = 'A'; elseif any(opt=='aAgGhH') opt = upper(opt); % eliminate old version else error('Error MEAN: invalid opt argument'); end; if (opt == 'A') [y, n] = sumskipnan(x,DIM,W); y = y./n; elseif (opt == 'G') [y, n] = sumskipnan(log(x),DIM,W); y = exp (y./n); elseif (opt == 'H') [y, n] = sumskipnan(1./x,DIM,W); y = n./y; else fprintf (2,'mean: option `%s` not recognized', opt); end %!assert(mean([1,NaN],1),[1,NaN]) %!assert(mean([1,NaN],2),1) %!assert(mean([+inf,-inf]),NaN) %!assert(mean([+0,-0],'h'),NaN) %!assert(mean([1,4,NaN],'g'),2) NaN/inst/nansum.m0000664002356700235670000000256311656313737014420 0ustar schloeglschloeglfunction [o] = nansum(i,DIM) % NANSUM same as SUM but ignores NaN's. % NANSUM is OBSOLETE; use SUMSKIPNAN instead. NANSUM is included % to fix a bug in some other versions. % % Y = nansum(x [,DIM]) % % DIM dimension % 1 sum of columns % 2 sum of rows % default or []: first DIMENSION with more than 1 element % Y resulting sum % % % see also: SUM, SUMSKIPNAN, NANSUM % $Id: nansum.m 9033 2011-11-08 20:58:07Z schloegl $ % Copyright (C) 2000-2003,2008 by Alois Schloegl % This is part of the NaN-toolbox. For more details see % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin>1 o = sumskipnan(i,DIM); else o = sumskipnan(i); end; %!assert(nansum(NaN),0) NaN/inst/meansq.m0000664002356700235670000000313711553522126014367 0ustar schloeglschloeglfunction o=meansq(x,DIM,W) % MEANSQ calculates the mean of the squares % % y = meansq(x,DIM,W) % % DIM dimension % 1 STD of columns % 2 STD of rows % N STD of N-th dimension % default or []: first DIMENSION, with more than 1 element % W weights to compute weighted mean (default: []) % if W=[], all weights are 1. % number of elements in W must match size(x,DIM) % % features: % - can deal with NaN's (missing values) % - weighting of data % - dimension argument also in Octave % - compatible to Matlab and Octave % % see also: SUMSQ, SUMSKIPNAN, MEAN, VAR, STD, RMS % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % Copyright (C) 2000-2003,2009 by Alois Schloegl % $Id: meansq.m 8223 2011-04-20 09:16:06Z schloegl $ % This function is part of the NaN-toolbox for Octave and Matlab % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if nargin<3, W = []; end; if nargin<2, [o,N,ssq] = sumskipnan(x,[],W); else [o,N,ssq] = sumskipnan(x,DIM,W); end; o = ssq./N; NaN/inst/medAbsDev.m0000664002356700235670000000303611520323414014724 0ustar schloeglschloeglfunction [D, M] = medAbsDev(X, DIM) % medAbsDev calculates the median absolute deviation % % Usage: D = medAbsDev(X, DIM) % or: [D, M] = medAbsDev(X, DIM) % Input: X : data % DIM: dimension along which mad should be calculated (1=columns, 2=rows) % (optional, default=first dimension with more than 1 element % Output: D : median absolute deviations % M : medians (optional) % Copyright (C) 2003 Patrick Houweling % Copyright (C) 2009 Alois Schloegl % $Id: medAbsDev.m 8075 2011-01-27 17:10:36Z schloegl $ % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software: you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation, either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program. If not, see . % input checks if any(size(X)==0), return; end; if nargin<2, M = median(X); else M = median(X, DIM); end; % median absolute deviation: median of absolute deviations to median D = median(abs(X - repmat(M, size(X)./size(M))), DIM);NaN/inst/tcdf.m0000664002356700235670000000342211656313737014032 0ustar schloeglschloeglfunction p = tcdf(x,n) % TCDF returns student cumulative distribtion function % % cdf = tcdf(x,DF); % % Computes the CDF of the students distribution % with DF degrees of freedom % x,DF must be matrices of same size, or any one can be a scalar. % % see also: NORMCDF, TPDF, TINV % Reference(s): % $Id: tcdf.m 9033 2011-11-08 20:58:07Z schloegl $ % Copyright (C) 2000-2003,2009 by Alois Schloegl % This is part of the NaN-toolbox. For more details see % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % check size of arguments if all(size(x)==1) x = repmat(x,size(n)); elseif all(size(n)==1) n = repmat(n,size(x)); elseif all(size(x)==size(n)) ; %% OK, do nothing else error('size of input arguments must be equal or scalar') end; % allocate memory p = zeros(size(x)); p((x==Inf) & (n>0)) = 1; % workaround for invalid arguments in BETAINC ix = isnan(x) | ~(n>0); p(ix)= NaN; ix = (x > -Inf) & (x < Inf) & (n > 0); p(ix) = betainc (n(ix) ./ (n(ix) + x(ix).^2), n(ix)/2, 1/2) / 2; ix = find(x>0); p(ix) = 1 - p(ix); % shape output p = reshape(p,size(x)); %!assert(tcdf(NaN,4),NaN) NaN/inst/trimean.m0000664002356700235670000000465111714752314014547 0ustar schloeglschloeglfunction y=trimean(x,DIM) % TRIMEAN yields the weighted mean of the median and the quartiles % m = TRIMEAN(y). % % The trimean is m = (Q1+2*MED+Q3)/4 % with quartile Q1 and Q3 and median MED % % N-dimensional data is supported % % REFERENCES: % [1] http://mathworld.wolfram.com/Trimean.html % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: trimean.m 9601 2012-02-09 14:14:36Z schloegl $ % Copyright (C) 1996-2003,2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ global FLAG_NANS_OCCURED; % check dimension sz=size(x); % find the dimension if nargin==1, DIM = find(size(x)>1,1); if isempty(DIM), DIM=1; end; end; if DIM>length(sz), sz = [sz,ones(1,DIM-length(sz))]; end; D1 = prod(sz(1:DIM-1)); D2 = sz(DIM); D3 = prod(sz(DIM+1:length(sz))); D0 = [sz(1:DIM-1),1,sz(DIM+1:length(sz))]; y = repmat(nan,D0); q = repmat(nan,3,1); for k = 0:D1-1, for l = 0:D3-1, xi = k + l * D1*sz(DIM) + 1 ; xo = k + l * D1 + 1; t = x(xi+(0:sz(DIM)-1)*D1); t = sort(t(~isnan(t))); t = t(:); n = length(t); if (n. % $Id$ % Copyright (C) 2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ %%% TODO: implement as mex-function i = isnan(x); x(i) = 0; if nargin==2, x = cumsum(x,DIM); x(i) = NaN; elseif nargin==1, x = cumsum(x); x(i) = NaN; else help cumsumskipnan end; NaN/inst/nantest.m0000664002356700235670000002247412545323521014564 0ustar schloeglschloegl% NANTEST checks several mathematical operations and a few % statistical functions for their correctness related to NaN's. % e.g. it checks norminv, normcdf, normpdf, sort, matrix division and multiplication. % % % see also: NANINSTTEST % % REFERENCE(S): % [1] W. Kahan (1996) Lecture notes on the Status of "IEEE Standard 754 for % Binary Floating-point Arithmetic. % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: nantest.m 12844 2015-07-02 21:04:17Z schloegl $ % Copyright (C) 2000-2004,2009 by Alois Schloegl % This script is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ %FLAG_WARNING = warning; %warning('off'); try x = randn([3,4,5]); x(~isnan(x)) = 0; catch fprintf(1,'WARNING: NANTEST fails for 3-DIM matrices. \n'); end; try [s,n] = sumskipnan([nan,1,4,5]); catch fprintf(1,'WARNING: SUMSKIPNAN is not avaible. \n'); end; % check NORMPDF, NORMCDF, NORMINV x = [-inf,-2,-1,-.5,0,.5,1,2,3,inf,nan]'; if exist('normpdf','file')==2, q(1) = sum(isnan(normpdf(x,2,0)))>sum(isnan(x)); if q(1), fprintf(1,'NORMPDF cannot handle v=0.\n'); fprintf(1,'-> NORMPDF should be replaced\n'); end; end; if exist('normcdf','file')==2, q(2) = sum(isnan(normcdf(x,2,0)))>sum(isnan(x)); if q(2), fprintf(1,'NORMCDF cannot handle v=0.\n'); fprintf(1,'-> NORMCDF should be replaced\n'); end; end; if ~(any(exist('erfinv') == [2,5])) fprintf(1,'ERFINV is not available\n'); elseif exist('norminv','file')==2, p = [-inf,-.2,0,.2,.5,1,2,inf,nan]; q(3) = sum(~isnan(norminv(p,2,0)))<4; if q(3), fprintf(1,'NORMINV cannot handle correctly v=0.\n'); fprintf(1,'-> NORMINV should be replaced\n'); end; q(4) = ~isnan(norminv(0,NaN,0)); q(5) = any(norminv(0.5,[1 2 3],0)~=(1:3)); end; if exist('tpdf','file')==2, q(6) = ~isnan(tpdf(nan,4)); if q(6), fprintf(1,'TPDF(NaN,4) does not return NaN\n'); fprintf(1,'-> TPDF should be replaced\n'); end; end; if exist('tcdf','file')==2, try q(7) = ~isnan(tcdf(nan,4)); catch q(7) = 1; end; if q(7), fprintf(1,'TCDF(NaN,4) does not return NaN\n'); fprintf(1,'-> TCDF should be replaced\n'); end; end; if exist('tinv','file')==2, try q(8) = ~isnan(tinv(nan,4)); catch q(8) = 1; end; if q(8), fprintf(1,'TINV(NaN,4) does not return NaN\n'); fprintf(1,'-> TINV should be replaced\n'); end; end; q(9) = isreal(double(2+3i)); if q(9) printf('DOUBLE rejects imaginary part\n-> this can affect SUMSKIPNAN\n'); end; try x = reshape(1:6,3,2); [cc,nn] = covm(x+i*x,'e'); q(10) = 0; catch q(10) = 1; end; if 0, %%%%% MOD if exist('mod')>1, if (mod(5,0))~=0, fprintf(1,'WARNING: MOD(x,0) does not return 0.\n'); end; if isnan(mod(5,0)), fprintf(1,'WARNING: MOD(x,0) returns NaN.\n'); end; if isnan(mod(5,inf)), fprintf(1,'WARNING: MOD(x,INF) returns NaN.\n'); end; end; %%%%% REM if exist('rem')>1, if (rem(5,0))~=0, fprintf(1,'WARNING: REM(x,0) does not return 0.\n'); end; if isnan(rem(5,0)), fprintf(1,'WARNING: REM(x,0) returns NaN.\n'); end; if isnan(mod(5,inf)), fprintf(1,'WARNING: REM(x,INF) returns NaN.\n'); end; end; end; %%%%% NANSUM(NAN) - this test addresses a problem in Matlab 5.3, 6.1 & 6.5 if exist('nansum','file'), if isnan(nansum(nan)), fprintf(1,'Warning: NANSUM(NaN) returns NaN instead of 0\n'); fprintf(1,'-> NANSUM should be replaced\n'); end; end; %%%%% NANSUM(NAN) - this test addresses a problem in Matlab 5.3, 6.1 & 6.5 if exist('nanstd','file'), if ~isnan(nanstd(0)), fprintf(1,'Warning: NANSTD(x) with isscalar(x) returns 0 instead of NaN\n'); fprintf(1,'-> NANSTD should be replaced\n'); end; end; %%%%% GEOMEAN - this test addresses a problem in Octave if exist('geomean','file'), if isnan(geomean((0:3)')), fprintf(1,'Warning: GEOMEAN([0,1,2,3]) NaN instead of 0\n'); fprintf(1,'-> GEOMEAN should be replaced\n'); end; end; %%%%% HARMMEAN - this test addresses a problem in Octave if exist('harmmean','file'), if isnan(harmmean(0:3)), fprintf(1,'Warning: HARMMEAN([0,1,2,3]) NaN instead of 0\n'); fprintf(1,'-> HARMMEAN should be replaced\n'); end; end; %%%%% BITAND - this test addresses a problem in Octave if exist('bitand')>1, if isnan(bitand(2^33-1,13)), fprintf(1,'BITAND can return NaN. \n'); end; end; %%%%% BITSHIFT - this test addresses a problem in Octave if exist('bitshift','file'), if isnan(bitshift(5,30,32)), fprintf(1,'BITSHIFT can return NaN.\n'); end; end; %%%%% ALL - this test addresses a problem in some old Octave and FreeMat v3.5 if any(NaN)==1, fprintf(1,'WARNING: ANY(NaN) returns 1 instead of 0\n'); end; if any([])==1, fprintf(1,'WARNING: ANY([]) returns 1 instead of 0\n'); end; %%%%% ALL - this test addresses a problem in some old Octave and FreeMat v3.5 if all(NaN)==0, fprintf(1,'WARNING: ALL(NaN) returns 0 instead of 1\n'); end; if all([])==0, fprintf(1,'WARNING: ALL([]) returns 0 instead of 1\n'); end; %%%%% SORT - this was once a problem in Octave Version < 2.1.36, and still is in FreeMat 4.0 %%%% if ~all(isnan(sort([3,4,NaN,3,4,NaN]))==[0,0,0,0,1,1]), warning('Warning: SORT does not properly handle NaN.'); end; %%%%% commutativity of 0*NaN %%% This test adresses a problem in Octave x=[-2:2;4:8]'; y=x;y(2,1)=nan;y(4,2)=nan; B=[1,0,2;0,3,1]; if ~all(all(isnan(y*B)==isnan(B'*y')')), fprintf(2,'WARNING: 0*NaN within matrix multiplication is not commutative\n'); end; % from Kahan (1996) tmp = (0-3*i)/inf; if isnan(tmp) fprintf(2,'WARNING: (0-3*i)/inf results in NaN instead of 0.\n'); end; %(roots([5,0,0])-[0;0]) %(roots([2,-10,12])-[3;2]) %(roots([2e-37,-2,2])-[1e37;1]) %%%%% check nan/nan %% this test addresses a problem in Matlab 5.3, 6.1 & 6.5 p = 4; tmp1 = repmat(nan, 4); tmp2 = repmat(nan, 4); if ispc % Octave 4.0.0 on Windows crashes, therefore the test is disabled warning('mrdivide (repmat(nan,4), repmat(nan,4)) and mldivide (repmat(nan,4), repmat(nan,4)) not tested because it might crash Octave on Windows.\n'); else try tmp1 = repmat(nan,p) / repmat(nan,p); catch % exception error in Octave 3.8.2 and later of debian wheezy fprintf(2,'mrdivide (repmat(nan,4), repmat(nan,4)) fails with an exception\n'); end; try tmp2 = repmat(nan,p) \ repmat(nan,p); catch % exception error in Octave 3.8.2 and later of debian wheezy fprintf(2,'mldivide (repmat(nan,4), repmat(nan,4)) fails with an exception\n'); end end; tmp3 = repmat(0,p)/repmat(0,p); tmp4 = repmat(0,p)\repmat(0,p); tmp5 = repmat(0,p)*repmat(inf,p); tmp6 = repmat(inf,p)*repmat(0,p); x = randn(100,1)*ones(1,p); y=x'*x; tmp7 = y/y; tmp8 = y\y; if ~all(isnan(tmp1(:))), fprintf(1,'WARNING: matrix division NaN/NaN does not result in NaN\n'); end; if ~all(isnan(tmp2(:))), fprintf(1,'WARNING: matrix division NaN\\NaN does not result in NaN\n'); end; if ~all(isnan(tmp3(:))), fprintf(2,'WARNING: matrix division 0/0 does not result in NaN\n'); end; if ~all(isnan(tmp4(:))), fprintf(2,'WARNING: matrix division 0\\0 does not result in NaN\n'); end; if ~all(isnan(tmp5(:))), fprintf(2,'WARNING: matrix multiplication 0*inf does not result in NaN\n'); end; if ~all(isnan(tmp6(:))), fprintf(2,'WARNING: matrix multiplication inf*0 does not result in NaN\n'); end; if any(any(tmp7==inf)); fprintf(2,'WARNING: right division of two singulare matrices return INF\n'); end; if any(any(tmp8==inf)); fprintf(2,'WARNING: left division of two singulare matrices return INF\n'); end; tmp = [tmp1;tmp2;tmp3;tmp4;tmp5;tmp6;tmp7;tmp8]; %warning(FLAG_WARNING); %%%%% QUANTILE TEST d = [1 1 2 2 4 4 10 700]'; q = [-1,0,.05,.1,.25,.49,.5,.51,.75,.8, .999999,1,2]; r = [ NaN, 1, 1, 1, 1.5, 2, 3, 4, 7, 10, 700, 700, NaN]; if any( quantile(d, q)' - r>0) fprintf(1,'Quantile(1): failed\n'); else fprintf(1,'Quantile(1): OK\n'); end; if exist('histo3','file') H = histo3(d); else H.X = [1;2;4;10;700]; H.H = [2;2;2;1;1]; H.datatype = 'HISTOGRAM'; end; if any( quantile(H, q)' - r>0) fprintf(1,'Quantile(2): failed\n'); else fprintf(1,'Quantile(2): OK\n'); end; NaN/inst/gscatter.m0000664002356700235670000000435411601145313014712 0ustar schloeglschloeglfunction [h] = gscatter(x,y,group,clr,sym,siz,doleg,xname,yname) % GSCATTER scatter plot of groups % % gscatter(x,y,group) % gscatter(x,y,group,clr,sym,siz) % gscatter(x,y,group,clr,sym,siz,doleg) % gscatter(x,y,group,clr,sym,siz,doleg,xname,yname) % h = gscatter(...) % % x,y, group: vectors with equal length % clf: color vector, default 'bgrcmyk' % sym: symbol, default '.' % siz: size of Marker % doleg: 'on' (default) shows legend, 'off' turns of legend % xname, yname: name of axis % % % see also: ecdf, cdfplot % % References: % $Id$ % Copyright (C) 2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. [b,i,j] = unique(group); if nargin<3 help gscatter; error('invalid number of arguments;') end; if nargin<4 clr = []; end if nargin<5 sym = []; end if nargin<6 siz = []; end if nargin<7 doleg = []; end if nargin<8 xname = []; end if nargin<9 yname = []; end; if isempty(clr), clr='bgrcmyk'; end; if isempty(sym), sym='.'; end; if isempty(doleg), doleg='on'; end; for k=1:length(b); %ix = find(k==j); c = clr(mod(k-1,length(clr))+1); s = sym(mod(k-1,length(sym))+1); hh(k) = plot(x(k==j),y(k==j),[c,s]); if ~isempty(siz) z = siz(mod(k-1,length(siz))+1); set(hh(k),'MarkerSize',z); end hold on; end; hold off; if ~strcmpi(doleg,'off') if isnumeric(b) b=num2str(b(:)); end; legend(b); end; if ~isempty(xname) xlabel(xname); end; if ~isempty(yname) ylabel(yname); end; if nargout>0, h = hh; end; NaN/inst/classify.m0000664002356700235670000000462211601145313014711 0ustar schloeglschloeglfunction [CLASS,ERR,POSTERIOR,LOGP,COEF]=classify(sample,training,classlabel,TYPE) % CLASSIFY classifies sample data into categories % defined by the training data and its group information % % CLASS = classify(sample, training, group) % CLASS = classify(sample, training, group, TYPE) % [CLASS,ERR,POSTERIOR,LOGP,COEF] = CLASSIFY(...) % % CLASS contains the assigned group. % ERR is the classification error on the training set weighted by the % prior propability of each group. % % The same classifier as in TRAIN_SC are supported. % % ATTENTION: no cross-validation is applied, therefore the % classification error is too optimistic (overfitting). % Use XVAL instead to obtain cross-validated performance. % % see also: TRAIN_SC, TEST_SC, XVAL % % References: % [1] R. Duda, P. Hart, and D. Stork, Pattern Classification, second ed. % John Wiley & Sons, 2001. % $Id$ % Copyright (C) 2008,2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. if nargin<4 TYPE = 'linear'; end; if strcmp(TYPE,'linear') TYPE = 'LDA'; elseif strcmp(TYPE,'quadratic') TYPE = 'QDA2'; % result is closer to Matlab elseif strcmp(TYPE,'diagLinear') TYPE = 'NBC'; elseif strcmp(TYPE,'diagQuadratic') TYPE = 'NBC'; elseif strcmp(TYPE,'mahalanobis') TYPE = 'MDA'; end; [group,I,classlabel] = unique(classlabel); CC = train_sc(training,classlabel,TYPE); R = test_sc(CC,sample); CLASS = group(R.classlabel); if nargout>1, R = test_sc(CC,training,[],classlabel); ERR = 1-R.ACC; end; if nargout>2, warning('output arguments POSTERIOR,LOGP and COEF not supported') POSTERIOR = []; LOGP = []; COEF = []; end; NaN/inst/center.m0000664002356700235670000000342211553522126014360 0ustar schloeglschloeglfunction [i,S] = center(i,DIM,W) % CENTER removes the mean % % [z,mu] = center(x,DIM,W) % removes mean x along dimension DIM % % x input data % DIM dimension % 1: column % 2: row % default or []: first DIMENSION, with more than 1 element % W weights to computed weighted mean (default: [], all weights = 1) % numel(W) must be equal to size(x,DIM) % % features: % - can deal with NaN's (missing values) % - weighting of data % - dimension argument % - compatible to Matlab and Octave % % see also: SUMSKIPNAN, MEAN, STD, DETREND, ZSCORE % % REFERENCE(S): % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: center.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2003,2005,2009 by Alois Schloegl % This is part of the NaN-toolbox. For more details see % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if any(size(i)==0); return; end; if nargin<3, W = []; end; if nargin>1, [S,N] = sumskipnan(i,DIM,W); else [S,N] = sumskipnan(i,[],W); end; S = S./N; szi = size(i); szs = size(S); if length(szs) % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. if (nargin<3) || isempty(MODE), MODE = 'LDA'; end; if ischar(MODE) tmp = MODE; clear MODE; MODE.TYPE = tmp; elseif ~isfield(MODE,'TYPE') MODE.TYPE=''; end; sz = size(D); NG = []; W = []; if iscell(classlabel) [b,i,C] = unique(classlabel{:,1}); if size(classlabel,2)>1, W = [classlabel{:,2}]; end; if size(classlabel,2)>2, [Label,tmp1,NG] = unique(classlabel{:,3}); end; elseif size(classlabel,2)>1, %% group-wise classvalidation C = classlabel(:,1); W = classlabel(:,2); if size(classlabel,2)==2, warning('This option defines W and NG in an ambigous way - use instead xval(D,{C,[],NG},...) or xval(D,{C,W},...)'); else [Label,tmp1,NG] = unique(classlabel(:,3)); end; else C = classlabel; end; if all(W==1), W = []; end; if sz(1)~=size(C,1), error('length of data and classlabel does not fit'); end; % use only valid samples ix0 = find(~any(isnan(C),2)); if isempty(NG) if (nargin<4) || strcmpi(arg4,'LOOM') %% LOOM NG = (1:sz(1))'; elseif isnumeric(arg4) if isscalar(arg4) % K-fold XV NG = ceil((1:length(C))'*arg4/length(C)); elseif length(arg4)==2, NG = ceil((1:length(C))'*arg4(1)/length(C)); end; end; end; sz = size(D); if sz(1)~=length(C), error('length of data and classlabel does not fit'); end; if ~isfield(MODE,'hyperparameter') MODE.hyperparameter = []; end cl = repmat(NaN,size(classlabel,1),1); for k = 1:max(NG), ix = ix0(NG(ix0)~=k); if isempty(W), CC = train_sc(D(ix,:), C(ix), MODE); else CC = train_sc(D(ix,:), C(ix), MODE, W(ix)); end; ix = ix0(NG(ix0)==k); r = test_sc(CC, D(ix,:)); cl(ix,1) = r.classlabel; end; %R = kappa(C,cl,'notIgnoreNAN',W); R = kappa(C,cl,[],W); %R2 = kappa(R.H); R.ERR = 1-R.ACC; if isnumeric(R.Label) R.Label = cellstr(int2str(R.Label)); end; if nargout>1, % final classifier if isempty(W), CC = train_sc(D,C,MODE); else CC = train_sc(D,C,MODE,W); end; CC.Labels = 1:max(C); %CC.Labels = unique(C); end; NaN/inst/mad.m0000664002356700235670000000430311553522126013640 0ustar schloeglschloeglfunction R = mad(i,DIM) % MAD estimates the Mean Absolute deviation % (note that according to [1,2] this is the mean deviation; % not the mean absolute deviation) % % y = mad(x,DIM) % calculates the mean deviation of x in dimension DIM % % DIM dimension % 1: STATS of columns % 2: STATS of rows % default or []: first DIMENSION, with more than 1 element % % features: % - can deal with NaN's (missing values) % - dimension argument % - compatible to Matlab and Octave % % see also: SUMSKIPNAN, VAR, STD, % % REFERENCE(S): % [1] http://mathworld.wolfram.com/MeanDeviation.html % [2] L. Sachs, "Applied Statistics: A Handbook of Techniques", Springer-Verlag, 1984, page 253. % % [3] http://mathworld.wolfram.com/MeanAbsoluteDeviation.html % [4] Kenney, J. F. and Keeping, E. S. "Mean Absolute Deviation." §6.4 in Mathematics of Statistics, Pt. 1, 3rd ed. Princeton, NJ: Van Nostrand, pp. 76-77 1962. % $Id: mad.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2002,2010 by Alois Schloegl % This is part of the NaN-toolbox. For more details see % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin==1, DIM = find(size(i)>1,1); if isempty(DIM), DIM=1; end; end; [S,N] = sumskipnan(i,DIM); % sum i = i - repmat(S./N,size(i)./size(S)); % remove mean [S,N] = sumskipnan(abs(i),DIM); % %if flag_implicit_unbiased_estim; %% ------- unbiased estimates ----------- n1 = max(N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and STE are INF %else % n1 = N; %end; R = S./n1; NaN/inst/std.m0000664002356700235670000000723311553522126013676 0ustar schloeglschloeglfunction [o,v]=std(x,opt,DIM,W) % STD calculates the standard deviation. % % [y,v] = std(x [, opt[, DIM [, W]]]) % % opt option % 0: normalizes with N-1 [default] % provides the square root of best unbiased estimator of the variance % 1: normalizes with N, % this provides the square root of the second moment around the mean % otherwise: % best unbiased estimator of the standard deviation (see [1]) % % DIM dimension % N STD of N-th dimension % default or []: first DIMENSION, with more than 1 element % W weights to compute weighted s.d. (default: []) % if W=[], all weights are 1. % number of elements in W must match size(x,DIM) % % y estimated standard deviation % % features: % - provides an unbiased estimation of the S.D. % - can deal with NaN's (missing values) % - weighting of data % - dimension argument also in Octave % - compatible to Matlab and Octave % % see also: RMS, SUMSKIPNAN, MEAN, VAR, MEANSQ, % % % References(s): % [1] http://mathworld.wolfram.com/StandardDeviationDistribution.html % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: std.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2003,2006,2009,2010 by Alois Schloegl % This is part of the NaN-toolbox for Octave and Matlab % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if nargin<4, W = []; end; if nargin<3, DIM = []; end; if isempty(DIM), DIM = find(size(x)>1,1); if isempty(DIM), DIM=1; end; end; [y,n,ssq] = sumskipnan(x,DIM,W); if all(ssq(:).*n(:) > 2*(y(:).^2)) %% rounding error is neglectable y = ssq - y.*y./n; else %% rounding error is not neglectable szx = size(x); szy = size(y); if length(szy)1, v = y.*((max(n-1,0)./(n.*n))-1./(n.*ib.*ib)); % variance of the estimated S.D. ??? needs further checks end; NaN/inst/percentile.m0000664002356700235670000000313411553522126015232 0ustar schloeglschloeglfunction Q=percentile(Y,q,DIM) % PERCENTILE calculates the percentiles of histograms and sample arrays. % % Q = percentile(Y,q) % Q = percentile(Y,q,DIM) % returns the q-th percentile along dimension DIM of sample array Y. % size(Q) is equal size(Y) except for dimension DIM which is size(Q,DIM)=length(Q) % % Q = percentile(HIS,q) % returns the q-th percentile from the histogram HIS. % HIS must be a HISTOGRAM struct as defined in HISTO2 or HISTO3. % If q is a vector, the each row of Q returns the q(i)-th percentile % % see also: HISTO2, HISTO3, QUANTILE % $Id: percentile.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 1996-2003,2005,2006,2007 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin==2, Q = quantile(Y,q/100); elseif nargin==3, Q = quantile(Y,q/100,DIM); else help percentile end; NaN/inst/ecdf.m0000664002356700235670000000443011553522126014001 0ustar schloeglschloeglfunction [F,X]=ecdf(h,Y) % ECDF empirical cumulative function % NaN's are considered Missing values and are ignored. % % [F,X] = ecdf(Y) % calculates empirical cumulative distribution functions (i.e Kaplan-Meier estimate) % ecdf(Y) % ecdf(gca,Y) % without output arguments plots the empirical cdf, in axis gca. % % Y input data % must be a vector or matrix, in case Y is a matrix, the ecdf for every column is computed. % % see also: HISTO2, HISTO3, PERCENTILE, QUANTILE % $Id: ecdf.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if ~isscalar(h) || ~ishandle(h) || isstruct(h), Y = h; h = []; end; DIM = []; SW = isstruct(Y); if SW, SW = isfield(Y,'datatype'); end; if SW, SW = strcmp(Y.datatype,'HISTOGRAM'); end; if SW, [yr,yc]=size(Y.H); if ~isfield(Y,'N'); Y.N = sum(Y.H,1); end; f = [zeros(1,yc);cumsum(Y.H,1)]; for k=1:yc, f(:,k)=f(:,k)/Y.N(k); end; t = [Y.X(1,:);Y.X]; elseif isnumeric(Y), sz = size(Y); if isempty(DIM), DIM = min(find(sz>1)); if isempty(DIM), DIM = 1; end; end; if DIM==2, Y=Y.'; DIM = 1; end; t = sort(Y,1); t = [t(1,:);t]; N = sum(~isnan(Y),1); f = zeros(size(Y,1)+1,size(Y,2)); for k=1:size(Y,2), f(:,k)=[0:size(Y,1)]'/N(k); end; end; if nargout<1, if ~isempty(h), axes(h); end; stairs(t,f); else F = f; X = t; end; NaN/inst/harmmean.m0000664002356700235670000000333311553522126014671 0ustar schloeglschloeglfunction [y] = harmmean(x,DIM,W) % HARMMEAN calculates the harmonic mean of data elements. % The harmonic mean is the inverse of the mean of the inverse elements. % % y = harmmean(x [,DIM [,W]]) is the same as % y = mean(x,'H' [,DIM [,W]]) % % DIM dimension % 1 STD of columns % 2 STD of rows % default or []: first DIMENSION, with more than 1 element % W weights to compute weighted mean (default: []) % if W=[], all weights are 1. % number of elements in W must match size(x,DIM) % % features: % - can deal with NaN's (missing values) % - weighting of data % - dimension argument also in Octave % - compatible to Matlab and Octave % % see also: SUMSKIPNAN, MEAN, GEOMEAN % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: harmmean.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2002,2009 by Alois Schloegl % This is part of the NaN-toolbox. For more details see % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if nargin<2 DIM=min(find(size(x)>1)); if isempty(DIM), DIM=1; end; end; if nargin<3 W = []; end; [y, n] = sumskipnan(1./x,DIM,W); y = n./y; NaN/inst/flag_implicit_skip_nan.m0000664002356700235670000000506311601145313017561 0ustar schloeglschloeglfunction FLAG = flag_implicit_skip_nan(i) % FLAG_IMPLICIT_SKIP_NAN sets and gets default mode for handling NaNs % 1 skips NaN's (the default mode if no mode is set) % 0 NaNs are propagated; input NaN's give NaN's at the output % % FLAG = flag_implicit_skip_nan() % gets current mode % % flag_implicit_skip_nan(FLAG) % sets mode % % prevFLAG = flag_implicit_skip_nan(nextFLAG) % gets previous set FLAG and sets FLAG for the future % flag_implicit_skip_nan(prevFLAG) % resets FLAG to previous mode % % It is used in: % SUMSKIPNAN, MEDIAN, QUANTILES, TRIMEAN % and affects many other functions like: % CENTER, KURTOSIS, MAD, MEAN, MOMENT, RMS, SEM, SKEWNESS, % STATISTIC, STD, VAR, ZSCORE etc. % % The mode is stored in the global variable FLAG_implicit_skip_nan % It is recommended to use flag_implicit_skip_nan(1) as default and % flag_implicit_skip_nan(0) should be used for exceptional cases only. % This feature might disappear without further notice, so you should really not % rely on it. % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA % $Id: flag_implicit_skip_nan.m 8351 2011-06-24 17:35:07Z carandraug $ % Copyright (C) 2001-2003,2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ persistent FLAG_implicit_skip_nan; %% if strcmp(version,'3.6'), FLAG_implicit_skip_nan=(1==1); end; %% hack for the use with Freemat3.6 %%% set DEFAULT value of FLAG if isempty(FLAG_implicit_skip_nan), FLAG_implicit_skip_nan = (1==1); %logical(1); % logical.m not available on 2.0.16 end; FLAG = FLAG_implicit_skip_nan; if nargin>0, FLAG_implicit_skip_nan = (i~=0); %logical(i); %logical.m not available in 2.0.16 if (~i) warning('flag_implicit_skipnan(0): You are warned!!! You have turned off skipping NaN in sumskipnan. This is not recommended. Make sure you really know what you do.') end; end; NaN/inst/train_lda_sparse.m0000664002356700235670000001145711601145313016412 0ustar schloeglschloeglfunction [CC] = train_lda_sparse(X,G,par,tol) % Linear Discriminant Analysis for the Small Sample Size Problem as described in % Algorithm 1 of J. Duintjer Tebbens, P. Schlesinger: 'Improving % Implementation of Linear Discriminant Analysis for the High Dimension/Small Sample Size % Problem', Computational Statistics and Data Analysis, vol. 52, no. 1, pp. 423-437, 2007. % Input: % X ...... (sparse) training data matrix % G ...... group coding matrix of the training data % test ...... (sparse) test data matrix % Gtest ...... group coding matrix of the test data % par ...... if par = 0 then classification exploits sparsity too % tol ...... tolerance to distinguish zero eigenvalues % Output: % err ...... Wrong classification rate (in %) % trafo ...... LDA transformation vectors % % Reference(s): % J. Duintjer Tebbens, P. Schlesinger: 'Improving % Implementation of Linear Discriminant Analysis for the High Dimension/Small Sample Size % Problem', Computational Statistics and Data Analysis, vol. 52, no. 1, % pp. 423-437, 2007. % % Copyright (C) by J. Duintjer Tebbens, Institute of Computer Science of the Academy of Sciences of the Czech Republic, % Pod Vodarenskou vezi 2, 182 07 Praha 8 Liben, 18.July.2006. % This work was supported by the Program Information Society under project % 1ET400300415. % % % Modified for the use with Matlab6.5 by A. Schloegl, 22.Aug.2006 % % $Id$ % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Step (1) %p = length(X(1,:));n = length(X(:,1));g = length(G(1,:)); G = sparse(G); [n,p]=size(X); g = size(G,2); for j=1:g nj(j) = norm(G(:,j))^2; end Dtild = spdiags(nj'.^(-1),0,g,g); Xtild = X*X'; Xtild1 = Xtild*ones(n,1); help = ones(n,1)*Xtild1'/n - (ones(1,n)*Xtild'*ones(n,1))/(n^2); matrix = Xtild - Xtild1*ones(1,n)/n - help; % eliminate non-symmetry of matrix due to rounding error: matrix = (matrix+matrix')/2; [V0,S] = eig(matrix); % [s,I] = sort(diag(S),'descend'); [s,I] = sort(-diag(S)); s = -s; cc = sum(s 0 [Q,R] = qr(V2,0); matrix = B1*Dhalf*Q; [V0,S] = eig(matrix'*matrix); %[s,I] = sort(diag(S),'descend'); [s,I] = sort(-diag(S)); s = -s; for j=1:cc C(:,j) = Q*V0(:,I(j)); end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Step (5) C1 = help2*Dhalf*C; trafo(:,1:g-1) = X'*C1 - (X'*ones(n,1))*(ones(1,n)*C1/n); for j=1:g-1 trafo(:,j) = trafo(:,j)/norm(trafo(:,j)); end CC.trafo = trafo; if par == 0 % X2 = full(test*X'); % [pred] = classifs(C1,M1,X2); CC.C1 = C1; CC.M1 = M1; CC.X = X; else % M = Dtild*G'*X; % [pred] = classifs(trafo,M,test); CC.C1 = trafo; CC.M1 = Dtild*G'*X; end NaN/inst/flag_implicit_significance.m0000664002356700235670000000467011553522126020413 0ustar schloeglschloeglfunction alpha=flag_implicit_significance(i) % The use of FLAG_IMPLICIT_SIGNIFICANCE is in experimental state. % flag_implicit_significance might even become obsolete. % % FLAG_IMPLICIT_SIGNIFICANCE sets and gets default alpha (level) of any significance test % The default alpha-level is stored in the global variable FLAG_implicit_significance % The idea is that the significance must not be assigned explicitely. % This might yield more readable code. % % Choose alpha low enough, because in alpha*100% of the cases, you will % reject the Null hypothesis just by change. For this reason, the default % alpha is 0.01. % % flag_implicit_significance(0.01) % sets the alpha-level for the significance test % % alpha = flag_implicit_significance() % gets default alpha % % flag_implicit_significance(alpha) % sets default alpha-level % % alpha = flag_implicit_significance(alpha) % gets and sets alpha % % features: % - compatible to Matlab and Octave % % see also: CORRCOEF, PARTCORRCOEF % $Id: flag_implicit_significance.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2002,2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . persistent FLAG_implicit_significance; DEFAULT_ALPHA = 0.01; %%% check whether FLAG was already defined if ~exist('FLAG_implicit_significance','var'), FLAG_implicit_significance = DEFAULT_ALPHA; % default value end; if isempty(FLAG_implicit_significance), FLAG_implicit_significance = DEFAULT_ALPHA; % default value end; if nargin>0, fprintf(2,'Warning: flag_implicit_significance is in an experimental state\n'); fprintf(2,'It might become obsolete.\n'); FLAG_implicit_significance = i; end; alpha = FLAG_implicit_significance; NaN/inst/statistic.m0000664002356700235670000001275211553522126015115 0ustar schloeglschloeglfunction [varargout]=statistic(i,DIM,fun) % STATISTIC estimates various statistics at once. % % R = STATISTIC(x,DIM) % calculates all statistic (see list of fun) in dimension DIM % R is a struct with all statistics % % y = STATISTIC(x,fun) % estimate of fun on dimension DIM % y gives the statistic of fun % % DIM dimension % 1: STATS of columns % 2: STATS of rows % N: STATS of N-th dimension % default or []: first DIMENSION, with more than 1 element % % fun 'mean' mean % 'std' standard deviation % 'var' variance % 'sem' standard error of the mean % 'rms' root mean square % 'meansq' mean of squares % 'sum' sum % 'sumsq' sum of squares % 'CM#' central moment of order # % 'skewness' skewness % 'kurtosis' excess coefficient (Fisher kurtosis) % 'mad' mean absolute deviation % % features: % - can deal with NaN's (missing values) % - dimension argument % - compatible to Matlab and Octave % % see also: SUMSKIPNAN % % REFERENCE(S): % [1] http://www.itl.nist.gov/ % [2] http://mathworld.wolfram.com/ % $Id: statistic.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2003,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin==1, DIM=[]; fun=[]; elseif nargin==2, if ~isnumeric(DIM), fun=DIM; DIM=[]; else fun=[]; end end if isempty(DIM), DIM = find(size(i)>1,1); if isempty(DIM), DIM=1; end; end; %R.N = sumskipnan(~isnan(i),DIM); % number of elements [R.SUM,R.N,R.SSQ] = sumskipnan(i,DIM); % sum %R.S3P = sumskipnan(i.^3,DIM); % sum of 3rd power R.S4P = sumskipnan(i.^4,DIM); % sum of 4th power %R.S5P = sumskipnan(i.^5,DIM); % sum of 5th power R.MEAN = R.SUM./R.N; % mean R.MSQ = R.SSQ./R.N; % mean square R.RMS = sqrt(R.MSQ); % root mean square %R.SSQ0 = R.SSQ-R.SUM.*R.MEAN; % sum square of mean removed R.SSQ0 = R.SSQ - real(R.SUM).*real(R.MEAN) - imag(R.SUM).*imag(R.MEAN); % sum square of mean removed %if flag_implicit_unbiased_estim; %% ------- unbiased estimates ----------- n1 = max(R.N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and SEM are INF %else % n1 = R.N; %end; R.VAR = R.SSQ0./n1; % variance (unbiased) R.STD = sqrt(R.VAR); % standard deviation R.SEM = sqrt(R.SSQ0./(R.N.*n1)); % standard error of the mean R.SEV = sqrt(n1.*(n1.*R.S4P./R.N+(R.N.^2-2*R.N+3).*(R.SSQ./R.N).^2)./(R.N.^3)); % standard error of the variance R.COEFFICIENT_OF_VARIATION = R.STD./R.MEAN; q = quantile(i, (1:3)/4, DIM); %sz=size(i);sz(DIM)=1; %Q0500=repmat(nan,sz); %Q0250=Q0500; %Q0750=Q0500; %MODE=Q0500; %for k=1:size(i,2), % tmp = sort(i(:,k)); %ix = find(~~diff([-inf;tmp;inf])) %ix2=diff(ix) %MODE(k)= tmp(max(ix2)==ix2) % Q0500(k) = flix(tmp,R.N(k)/2 + 0.5); % Q0250(k) = flix(tmp,R.N(k)/4 + 0.5); % Q0750(k) = flix(tmp,R.N(k)*3/4 + 0.5); %end; %R.MEDIAN = Q0500; %R.Quartiles = [Q0250; Q0750]; %R.Skewness.Fisher = (R.CM3)./(R.STD.^3); %%% same as R.SKEWNESS %R.Skewness.Pearson_Mode = (R.MEAN-R.MODE)./R.STD; %R.Skewness.Pearson_coeff1 = (3*R.MEAN-R.MODE)./R.STD; %R.Skewness.Pearson_coeff2 = (3*R.MEAN-R.MEDIAN)./R.STD; %R.Skewness.Bowley = (Q0750+Q0250 - 2*Q0500)./(Q0750-Q0250); % quartile skewness coefficient R.CM2 = R.SSQ0./n1; szi = size(i); szm = [size(R.MEAN),1]; i = i - repmat(R.MEAN,szi./szm(1:length(szi))); R.CM3 = sumskipnan(i.^3,DIM)./n1; R.CM4 = sumskipnan(i.^4,DIM)./n1; %R.CM5 = sumskipnan(i.^5,DIM)./n1; R.SKEWNESS = R.CM3./(R.STD.^3); R.KURTOSIS = R.CM4./(R.VAR.^2)-3; [R.MAD,N] = sumskipnan(abs(i),DIM); % mean absolute deviation R.MAD = R.MAD./n1; R.datatype = 'STAT Level 3'; tmp = version; if 0, %str2num(tmp(1))*1000+str2num(tmp(3))*100+str2num(tmp(5:6))<2136, % ###obsolete: was needed for Octave version < 2.1.36 if strcmp(fun(1:2),'CM') oo = str2double(fun(3:length(fun))); varargout = sumskipnan(i.^oo,DIM)./n1; elseif isempty(fun) varargout = R; else varargout = getfield(R,upper(fun)); end; else if iscell(fun), for k=1:length(fun), if strcmp(fun{k}(1:2),'CM') oo = str2double(fun{k}(3:length(fun{k}))); varargout{k} = sumskipnan(i.^oo,DIM)./n1; else varargout{k} = getfield(R,upper(fun{k})); end; end; elseif ischar(fun), if strcmp(fun(1:2),'CM') oo = str2double(fun(3:length(fun))); varargout{1} = sumskipnan(i.^oo,DIM)./n1; else varargout{1} = getfield(R,upper(fun)); end; else varargout{1} = R; end; end; NaN/inst/ttest2.m0000664002356700235670000001011711553522126014324 0ustar schloeglschloeglfunction [h, pval, ci, stats, df] = ttest2 (x, y, alpha, tail, vartype, DIM) % TTEST2 (unpaired) t-test % For two samples x and y from normal distributions with unknown % means and unknown equal variances, perform a two-sample t-test of % the null hypothesis of equal means. Under the null, the test % statistic T follows a Student distribution with DF degrees of % freedom. % % TTEST2 treads NaNs as "Missing values" and ignores these. % % H = ttest2(x,y) % H = ttest2(x,y,alpha) % H = ttest2(x,y,alpha,tail) % H = ttest2(x,y,alpha,tail,vartype) % H = ttest2(x,y,alpha,tail,vartype,DIM) % [H,PVAL] = ttest2(...) % [h,p,ci,stats] = ttest2(...) % % H=1 indicates a rejection of the Null-hypothesis at a significance % level of alpha (default alpha = 0.05). % % With the optional argument string TAIL, the Alternative of interest % can be selected. If TAIL is '!=' or '<>' or 'both', the null is tested % against the two-sided Alternative `mean (X) ~= mean (Y)'. If TAIL % is '>' or 'right', the one-sided Alternative `mean (X) > mean (Y)' is used. % Similarly for '<' or 'left', the one-sided Alternative `mean (X) < mean % (Y)' is used. The default is the two-sided case. % % vartype support only 'equal' (default value); the value 'unequal' is not supported. % % H returns whether the Null-Hypotheses must be rejected. % The p-value of the test is returned in PVAL. % % TTEST2 works on the first non-singleton dimension or on DIM. % % If no output argument is given, the p-value of the test is % displayed. % %%% not supported yet % [h,p,ci] = ttest2(...) % [h,p,ci,stats] = ttest2(...) % $Id$ % Copyright (C) 1995, 1996, 1997, 1998, 2000, 2002, 2005, 2006, 2007 % Kurt Hornik % Copyright (C) 2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software: you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation, either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program. If not, see . if ((nargin < 2) || (nargin > 6) || nargout > 4) print_usage ; end if (nargin < 3) || isempty(alpha) alpha = .05; end if (nargin < 4) || isempty(tail) tail = '~='; end if (~ ischar (tail)) error ('ttest2: tail must be a string'); end if (nargin < 5) || isempty(vartype) vartype = 'equal'; end if ~strcmp(vartype,'equal') error ('test: vartype not supported') end if nargin<6, DIM = find(size(x)>1,1); end; if isempty(DIM), DIM=1; end; szx = size(x); szy = size(y); szy(DIM) = 1; szx(DIM) = 1; if (any(szx-szy)) error ('ttest2: dimension of X and Y do not fit'); end [SX, NX] = sumskipnan(x, DIM); [SY, NY] = sumskipnan(y, DIM); stats.df = NX + NY - 2; MX = SX ./ NX; MY = SY ./ NY; if any(size(x)==0) || any(size(y)==0) v = NaN; else v = sumsq(x-repmat(MX,size(x)./size(MX))) + sumsq(y-repmat(MY,size(y)./size(MY))); end; stats.sd = sqrt(v/stats.df); stats.tstat = (MX - MY) .* sqrt ((NX .* NY .* stats.df) ./ (v .* (NX + NY))); cdf = tcdf (stats.tstat, stats.df); if (strcmp (tail, '~=') || strcmp (tail, '!=') || strcmp (tail, '<>')) || strcmp(tail,'both'), pval = 2 * min (cdf, 1 - cdf); elseif strcmp (tail, '>') || strcmp(tail,'right'), pval = 1 - cdf; elseif strcmp (tail, '<') || strcmp(tail,'left'), pval = cdf; else error ('ttest2: option %s not recognized', tail); end h = pval < alpha; if (nargout == 0) fprintf(1,' pval: %g\n', pval); end NaN/inst/normpdf.m0000664002356700235670000000327611656313737014566 0ustar schloeglschloeglfunction p = normpdf(x,m,s) % NORMPDF returns normal probability density % % pdf = normpdf(x,m,s); % % Computes the PDF of a the normal distribution % with mean m and standard deviation s % default: m=0; s=1; % x,m,s must be matrices of same size, or any one can be a scalar. % % see also: NORMCDF, NORMINV % Reference(s): % $Id: normpdf.m 9033 2011-11-08 20:58:07Z schloegl $ % Copyright (C) 2000-2003,2010,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin==1, m=0;s=1; elseif nargin==2, s=1; end; % allocate output memory and check size of argument z = (x-m)./s; % if this line causes an error, input arguments do not fit. %p = ((2*pi)^(-1/2))*exp(-z.^2/2)./s; SQ2PI = 2.5066282746310005024157652848110; p = exp(-z.^2/2)./(s*SQ2PI); p((x==m) & (s==0)) = inf; p(isinf(z)~=0) = 0; p(isnan(x) | isnan(m) | isnan(s) | (s<0)) = nan; %!assert(sum(isnan(normpdf([-inf,-2,-1,-.5,0,.5,1,2,3,inf,nan]',2,0))),1) NaN/inst/detrend.m0000664002356700235670000001100711553522126014523 0ustar schloeglschloeglfunction [X,T]=detrend(t,X,p) % DETREND removes the trend from data, NaN's are considered as missing values % % DETREND is fully compatible to previous Matlab and Octave DETREND with the following features added: % - handles NaN's by assuming that these are missing values % - handles unequally spaced data % - second output parameter gives the trend of the data % - compatible to Matlab and Octave % % [...]=detrend([t,] X [,p]) % removes trend for unequally spaced data % t represents the time points % X(i) is the value at time t(i) % p must be a scalar % % [...]=detrend(X,0) % [...]=detrend(X,'constant') % removes the mean % % [...]=detrend(X,p) % removes polynomial of order p (default p=1) % % [...]=detrend(X,1) - default % [...]=detrend(X,'linear') % removes linear trend % % [X,T]=detrend(...) % % X is the detrended data % T is the removed trend % % see also: SUMSKIPNAN, ZSCORE % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % Copyright (C) 1995, 1996 Kurt Hornik % $Id: detrend.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2001,2007 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if (nargin == 1) p = 1; X = t; t = []; elseif (nargin == 2) if strcmpi(X,'constant'), p = 0; X = t; t = []; elseif strcmpi(X,'linear'), p = 1; X = t; t = []; elseif ischar(X) error('unknown 2nd input argument'); elseif all(size(X)==1), p = X; X = t; t = []; else p = 1; end; elseif (nargin == 3) if ischar(X), warning('input arguments are not supported'); end; elseif (nargin > 3) fprintf (1,'usage: detrend (x [, p])\n'); end; % check data, must be in culomn order [m, n] = size (X); if (m == 1) X = X'; r=n; else r=m; end % check time scale if isempty(t), t = (1:r).'; % make time scale elseif ~all(size(t)==size(X)) t = t(:); end; % check dimension of t and X if ~all(size(X,1)==size(t,1)) fprintf (2,'detrend: size(t,1) must same as size(x,1) \n'); end; % check the order of the polynomial if (~(all(size(p)==1) && (p == round (p)) && (p >= 0))) fprintf (2,'detrend: p must be a nonnegative integer\n'); end if (nargout>1) , % needs more memory T = zeros(size(X))+nan; %T=repmat(nan,size(X)); % not supported by Octave 2.0.16 if (size(t,2)>1), % for multiple time scales for k=1:size(X,2), idx=find(~isnan(X(:,k))); b = (t(idx,k) * ones (1, p + 1)) .^ (ones (length(idx),1) * (0 : p)); T(idx,k) = b * (b \ X(idx,k)); end; else % if only one time scale is used b = (t * ones (1, p + 1)) .^ (ones (length(t),1) * (0 : p)); for k=1:size(X,2), idx=find(~isnan(X(:,k))); T(idx,k) = b(idx,:) * (b(idx,:) \ X(idx,k)); %X(idx,k) = X(idx,k) - T(idx,k); % 1st alternative implementation %X(:,k) = X(:,k) - T(:,k); % 2nd alternative end; end; X = X-T; % 3nd alternative if (m == 1) X = X'; T = T'; end else % needs less memory if (size(t,2)>1), % for multiple time scales for k = 1:size(X,2), idx = find(~isnan(X(:,k))); b = (t(idx,k) * ones (1, p + 1)) .^ (ones (length(idx),1) * (0 : p)); X(idx,k) = X(idx,k) - b * (b \ X(idx,k)); end; else % if only one time scale is used b = (t * ones (1, p + 1)) .^ (ones (length(t),1) * (0 : p)); for k = 1:size(X,2), idx = find(~isnan(X(:,k))); X(idx,k) = X(idx,k) - b(idx,:) * (b(idx,:) \ X(idx,k)); end; end; if (m == 1) X = X'; end end; NaN/inst/normcdf.m0000664002356700235670000000322312532773557014545 0ustar schloeglschloeglfunction p = normcdf(x,m,s) % NORMCDF returns normal cumulative distribtion function % % cdf = normcdf(x,m,s); % % Computes the CDF of a the normal distribution % with mean m and standard deviation s % default: m=0; s=1; % x,m,s must be matrices of same size, or any one can be a scalar. % % see also: NORMPDF, NORMINV % Reference(s): % $Id: normcdf.m 12812 2015-06-01 06:10:55Z schloegl $ % Copyright (C) 2000-2003,2010,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin==1, m=0; s=1; elseif nargin==2, s=1; end; % allocate output memory and check size of arguments z = (x-m)./s; % if this line causes an error, input arguments do not fit. p = erfc(z/-sqrt(2))/2; z = (s==0); p((xm) & z) = 1; p(isnan(x) | isnan(m) | isnan(s) | (s<0)) = nan; %!assert(sum(isnan(normcdf([-inf,-2,-1,-.5,0,.5,1,2,3,inf,nan]',2,0))),1) NaN/inst/fss.m0000664002356700235670000001270411660500702013671 0ustar schloeglschloeglfunction [idx,score] = fss(D,cl,N,MODE) % FSS - feature subset selection and feature ranking % the method is motivated by the max-relevance-min-redundancy (mRMR) % approach [1]. However, the default method uses partial correlation, % which has been developed from scratch. PCCM [3] describes % a similar idea, but is more complicated. % An alternative method based on FSDD is implemented, too. % % [idx,score] = fss(D,cl) % [idx,score] = fss(D,cl,MODE) % [idx,score] = fss(D,cl,MODE) % % D data - each column represents a feature % cl classlabel % Mode 'Pearson' [default] correlation % 'rank' correlation % 'FSDD' feature selection algorithm based on a distance discriminant [2] % %%% 'MRMR','MID','MIQ' max-relevance, min redundancy [1] - not supported yet. % % score score of the feature % idx ranking of the feature % [tmp,idx]=sort(-score) % % see also: TRAIN_SC, XVAL, ROW_COL_DELETION % % REFERENCES: % [1] Peng, H.C., Long, F., and Ding, C., % Feature selection based on mutual information: criteria of max-dependency, max-relevance, and min-redundancy, % IEEE Transactions on Pattern Analysis and Machine Intelligence, % Vol. 27, No. 8, pp.1226-1238, 2005. % [2] Jianning Liang, Su Yang, Adam Winstanley, % Invariant optimal feature selection: A distance discriminant and feature ranking based solution, % Pattern Recognition, Volume 41, Issue 5, May 2008, Pages 1429-1439. % ISSN 0031-3203, DOI: 10.1016/j.patcog.2007.10.018. % [3] K. Raghuraj Rao and S. Lakshminarayanan % Partial correlation based variable selection approach for multivariate data classification methods % Chemometrics and Intelligent Laboratory Systems % Volume 86, Issue 1, 15 March 2007, Pages 68-81 % http://dx.doi.org/10.1016/j.chemolab.2006.08.007 % $Id: fss.m 9104 2011-11-15 15:14:10Z carandraug $ % Copyright (C) 2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin<3 MODE = []; N = []; elseif ischar(N) MODE = N; N = []; elseif nargin<4, MODE = []; end if isempty(N), N = size(D,2); end score = repmat(NaN,1,size(D,2)); if 0, %strcmpi(MODE,'MRMR') || strcmpi(MODE,'MID') || strcmpi(MODE,'MIQ'); %% RMRM/MID/MIQ is not supported %% TODO: FIXME [tmp,t] = sort([cl,D]); cl = t(:,1:size(cl,2)); D = t(:,1:size(D,2)); for k = 1:N, V(k) = mi(cl, D(:,k)); for m = 1:N, W(k,m) = mi(D(:,m), D(:,k)); end MID(k) = V(k) - mean(W(k,:)); MIQ(k) = V(k) / mean(W(k,:)); end if strcmpi(MODE,'MIQ') [score,idx] = sort(MIQ,[],'descend'); else [score,idx] = sort(MID,[],'descend'); end elseif strcmpi(MODE,'FSDD'); [b,i,j]=unique(cl); for k=1:length(b) n(k,1) = sum(j==k); m(k,:) = mean(D(j==k,:),1); v(k,:) = var(D(j==k,:),1); end m0 = mean(m,1,n); v0 = var(D,[],1); s2 = mean(m.^2,1,n) - m0.^2; score = (s2 - 2*mean(v,1,n)) ./ v0; [t,idx] = sort(-score); elseif isempty(MODE) || strcmpi(MODE,'rank') || strcmpi(MODE,'Pearson') cl = cat2bin(cl); if strcmpi(MODE,'rank'), [tmp,D] = sort(D,1); end idx = repmat(NaN,1,N); for k = 1:N, f = isnan(score); %%%%% compute partial correlation (X,Y|Z) % r = partcorrcoef(cl, D(:,f), D(:,~f)); % obsolete, not very robust %% this is a more robust version X = cl; Y = D(:,f); Z = D(:,~f); if (k>1) X = X-Z*(Z\X); Y = Y-Z*(Z\Y); end r = corrcoef(X,Y); [s,ix] = max(sumsq(r,1)); f = find(f); idx(k) = f(ix); score(idx(k)) = s; end end end function I = mi(x,y) ix = ~any(isnan([x,y]),2); H = sparse(x(ix),y(ix)); pij = H./sum(ix); Iij = pij.*log2(pij./(sum(pij,2)*sum(pij,1))); Iij(isnan(Iij)) = 0; I = sum(Iij(:)); end NaN/inst/skewness.m0000664002356700235670000000412111553522126014737 0ustar schloeglschloeglfunction R = skewness(i,DIM) % SKEWNESS estimates the skewness % % y = skewness(x,DIM) % calculates skewness of x in dimension DIM % % DIM dimension % 1: STATS of columns % 2: STATS of rows % default or []: first DIMENSION, with more than 1 element % % features: % - can deal with NaN's (missing values) % - dimension argument % - compatible to Matlab and Octave % % see also: SUMSKIPNAN, STATISTIC % % REFERENCE(S): % http://mathworld.wolfram.com/ % $Id: skewness.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2003,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % check input arguments if nargin==1, DIM = find(size(i)>1,1); if isempty(DIM), DIM=1; end; end; [R.SUM,R.N,R.SSQ] = sumskipnan(i,DIM); % sum R.MEAN = R.SUM./R.N; % mean R.SSQ0 = R.SSQ - real(R.SUM).*real(R.MEAN) - imag(R.SUM).*imag(R.MEAN); % sum square with mean removed %if flag_implicit_unbiased_estim; %% ------- unbiased estimates ----------- n1 = max(R.N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and SEM are INF %else % n1 = R.N; %end; R.VAR = R.SSQ0./n1; % variance (unbiased) R.STD = sqrt(R.VAR); % standard deviation i = i - repmat(R.MEAN,size(i)./size(R.MEAN)); R.CM3 = sumskipnan(i.^3,DIM)./n1; %R.CM4 = sumskipnan(i.^4,DIM)./n1; R = R.CM3./(R.STD.^3); %R = R.CM4./(R.VAR.^2)-3; NaN/inst/flag_accuracy_level.m0000664002356700235670000000642511601145313017051 0ustar schloeglschloeglfunction FLAG = flag_accuracy_level(i) % FLAG_ACCURACY_LEVEL sets and gets accuracy level % used in SUMSKIPNAN_MEX and COVM_MEX % The error margin of the naive summation is N*eps (N is the number of samples), % the error margin is only 2*eps if Kahan's summation is used [1]. % % 0: maximum speed [default] % accuracy of double (64bit) with naive summation (error = N*2^-52) % 1: accuracy of extended (80bit) with naive summation (error = N*2^-64) % 2: accuracy of double (64bit) with Kahan summation (error = 2^-52) % 3: accuracy of extended (80bit) with Kahan summation (error = 2^-64) % % Please note, level 3 might be equally accurate but slower than 1 or 2 on % some platforms. In order to determine what is good for you, you might want % to run ACCTEST. % % FLAG = flag_accuracy_level() % gets current level % flag_accuracy_level(FLAG) % sets accuracy level % % see also: ACCTEST % % Reference: % [1] David Goldberg, % What Every Computer Scientist Should Know About Floating-Point Arithmetic % ACM Computing Surveys, Vol 23, No 1, March 1991. % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA % $Id$ % Copyright (C) 2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ persistent FLAG_ACCURACY_LEVEL; %% if strcmp(version,'3.6'), FLAG_ACCURACY_LEVEL=1; end; %% hack for the use with Freemat3.6 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% set the default accuracy level for your platform, ACCTEST might help to determine the optimum for your platform. %% If you use Matlab, use level 0 or 2; 1 and 3 are much slower but do not show a better accuracy %% Octave seems to be able to use all 4 levels, were the differences of accuracy between succeeding levels become smaller DEFAULT_ACCURACY_LEVEL = 0; %% maximum speed, accuracy sufficient for most needs. %% DEFAULT_ACCURACY_LEVEL = 2; %% slower, but better accuracy for: AMDx64 Opteron, Phenom, Intel Pentium %% DEFAULT_ACCURACY_LEVEL = 1; %% slower, but better accuracy for: Octave on Intel Atom (no improvement with Matlab, just slower) %% DEFAULT_ACCURACY_LEVEL = 3; %% similar accuracy than 1 or 2 (depending on platform) but even slower. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% set DEFAULT value of FLAG if isempty(FLAG_ACCURACY_LEVEL), FLAG_ACCURACY_LEVEL = DEFAULT_ACCURACY_LEVEL; end; if nargin>0, if (i>3), i=3; end; if (i<0), i=0; end; FLAG_ACCURACY_LEVEL = double(i); end; FLAG = FLAG_ACCURACY_LEVEL; NaN/inst/sumsq.m0000664002356700235670000000263411553522126014254 0ustar schloeglschloeglfunction [o]=sumsq(x,DIM) % SUMSQ calculates the sum of squares. % % [y] = sumsq(x [, DIM]) % % DIM dimension % N STD of N-th dimension % default or []: first DIMENSION, with more than 1 element % % y estimated standard deviation % % features: % - can deal with NaN's (missing values) % - dimension argument also in Octave % - compatible to Matlab and Octave % % see also: RMS, SUMSKIPNAN, MEAN, VAR, MEANSQ, % % % References(s): % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id$ % Copyright (C) 2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if nargin<2, DIM = []; end; if isempty(DIM), DIM = find(size(x)>1,1); if isempty(DIM), DIM=1; end; end; [s,n,o] = sumskipnan(x,DIM); NaN/inst/tinv.m0000664002356700235670000000347211656313737014077 0ustar schloeglschloeglfunction y = tinv(x,n) % TINV returns inverse cumulative function of the student distribution % % x = tinv(p,v); % % Computes the quantile (inverse of the CDF) of a the student % cumulative distribution with mean m and standard deviation s % p,v must be matrices of same size, or any one can be a scalar. % % see also: TPDF, TCDF, NORMPDF, NORMCDF, NORMINV % Reference(s): % $Id: tinv.m 9033 2011-11-08 20:58:07Z schloegl $ % Copyright (C) 2000-2003,2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % allocate output memory and check size of arguments if all(size(x)==1) x = repmat(x,size(n)); elseif all(size(n)==1) n = repmat(n,size(x)); elseif all(size(x)==size(n)) ; %% OK, do nothing else error('size of input arguments must be equal or scalar') end; y = norminv(x); % do special cases, like x<=0, x>=1, isnan(x), n > 10000; y(~(n>0)) = NaN; ix = find(~isnan(x) & (n>0) & (n<10000)); if ~isempty(ix) y(ix) = (sign(x(ix) - 1/2).*sqrt(n(ix)./betainv(2*min(x(ix), 1-x(ix)), n(ix)/2, 1/2) - n(ix))); end; y = reshape(y,size(x)); %!assert(tinv(NaN,4),NaN) NaN/inst/partcorrcoef.m0000664002356700235670000001226211601145313015564 0ustar schloeglschloeglfunction [R,sig,ci1,ci2] = partcorrcoef(X,Y,Z,Mode) % PARTCORRCOEF calculates the partial correlation between X and Y % after removing the influence of Z. % X, Y and Z can contain missing values encoded with NaN. % NaN's are skipped, NaN do not result in a NaN output. % (Its assumed that the occurence of NaN's is uncorrelated) % The output gives NaN, only if there are insufficient input data. % % The partial correlation is defined as % pcc(xy|z)=(cc(x,y)-cc(x,z)*cc(y,z))/sqrt((1-cc(x,y)�)*((1-cc(x,z)�))) % % % PARTCORRCOEF(X [,Mode]); % calculates the (auto-)correlation matrix of X % PARTCORRCOEF(X,Y,Z); % PARTCORRCOEF(X,Y,Z,[]); % PARTCORRCOEF(X,Y,Z,'Pearson'); % PARTCORRCOEF(X,Y,Z,'Rank'); % PARTCORRCOEF(X,Y,Z,'Spearman'); % % Mode=[] [default] % removes from X and Y the part that can be explained by Z % and computes the correlation of the remaining part. % Ideally, this is equivalent to Mode='Pearson', however, in practice % this is more accurate. % Mode='Pearson' or 'parametric' % Mode='Spearman' % Mode='Rank' % computes the partial correlation based on cc(x,y),cc(x,z) and cc(y,z) % with the respective mode. % % [R,p,ci1,ci2] = PARTCORRCOEF(...); % r is the partialcorrelation matrix % r(i,j) is the partial correlation coefficient r between X(:,i) and Y(:,j) % when influence of Z is removed. % p gives the significance of PCC % It tests the null hypothesis that the product moment correlation coefficient is zero % using Student's t-test on the statistic t = r sqrt(N-Nz-2)/sqrt(1-r^2) % where N is the number of samples (Statistics, M. Spiegel, Schaum series). % p > alpha: do not reject the Null hypothesis: "R is zero". % p < alpha: The alternative hypothesis "R2 is larger than zero" is true with probability (1-alpha). % ci1 lower 0.95 confidence interval % ci2 upper 0.95 confidence interval % % see also: SUMSKIPNAN, COVM, COV, COR, SPEARMAN, RANKCORR, RANKS, CORRCOEF % % REFERENCES: % on the partial correlation coefficient % [1] http://www.tufts.edu/~gdallal/partial.htm % [2] http://www.nag.co.uk/numeric/fl/manual/pdf/G02/g02byf.pdf % $Id: partcorrcoef.m 8351 2011-06-24 17:35:07Z carandraug $ % Copyright (C) 2000-2002,2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA % Features: % + interprets NaN's as missing value % + Pearson's correlation % + Spearman's rank correlation % + Rank correlation (non-parametric, non-Spearman) % + is fast, using an efficient algorithm O(n.log(n)) for calculating the ranks % + significance test for null-hypthesis: r=0 % + confidence interval (0.99) included % - rank correlation works for cell arrays, too (no check for missing values). % + compatible with Octave and Matlab if nargin==3 Mode=[]; elseif nargin==4, else error('Error PARTCORRCOEF: Missing argument(s)\n'); end; if isempty(Z) R = corrcoef(X,Y,Mode); elseif isempty(Mode) if ~isempty(Z) for j=1:size(X,2) ix = ~any(isnan(Z),2) & ~isnan(X(:,j)); X(:,j) = X(:,j) - Z*(Z(ix,:)\X(ix,j)); end; for j=1:size(Y,2) ix = ~any(isnan(Z),2) & ~isnan(Y(:,j)); Y(:,j) = Y(:,j) - Z*(Z(ix,:)\Y(ix,j)); end; end; R = corrcoef(X,Y,Mode); else rxy = corrcoef(X,Y,Mode); rxz = corrcoef(X,Z,Mode); if isempty(Y), ryz = rxz; else ryz = corrcoef(Y,Z,Mode); end; %rxy,rxz,ryz R = (rxy-rxz*ryz')./sqrt((1-rxz.^2)*(1-ryz.^2)'); end; if nargout<2, return, end; % SIGNIFICANCE TEST %warning off; % prevent division-by-zero warnings in Matlab. NN=size(X,1)-size(Z,2); tmp = 1 - R.*R; tmp(tmp<0) = 0; % prevent tmp<0 i.e. imag(t)~=0 t = R.*sqrt(max(NN-2,0)./tmp); if exist('t_cdf','file') sig = t_cdf(t,NN-2); elseif exist('tcdf','file') sig = tcdf(t,NN-2); else fprintf('Warning CORRCOEF: significance test not completed because of missing TCDF-function\n') sig = repmat(nan,size(R)); end; sig = 2 * min(sig,1 - sig); if nargout<3, return, end; % CONFIDENCE INTERVAL if exist('flag_implicit_significance','file'), alpha = flag_implicit_significance; else alpha = 0.01; end; fprintf(1,'CORRCOEF: confidence interval is based on alpha=%f\n',alpha); tmp = R; %tmp(ix1 | ix2) = nan; % avoid division-by-zero warning z = log((1+tmp)./(1-tmp))/2; % Fisher's z-transform; %sz = 1./sqrt(NN-3); % standard error of z sz = sqrt(2)*erfinv(1-2*alpha)./sqrt(NN-3); % confidence interval for alpha of z ci1 = tanh(z-sz); ci2 = tanh(z+sz); NaN/inst/xptopen.m0000664002356700235670000000413712264570762014612 0ustar schloeglschloegl% XPTOPEN read of several file formats and writing of the SAS Transport Format (*.xpt) % Supported are ARFF, SAS-XPT and STATA files. % XPTOPEN is a mex-file and must be compiled before use. % More detailed help can be obtained by the command % xptopen % without an additional argument % % X = xptopen(filename) % X = xptopen(filename,'r') % read file with filename and return variables in struct X % % X = xptopen(filename,'w',X) % save fields of struct X in filename. % % The fields of X must be column vectors of equal length. % Each vector is either a numeric vector or a cell array of strings. % The SAS-XPT format stores Date/Time as numeric value counting the number of days since 1960-01-01. % % References: % [1] TS-140 THE RECORD LAYOUT OF A DATA SET IN SAS TRANSPORT (XPORT) FORMAT % http://support.sas.com/techsup/technote/ts140.html % [2] IBM floating point format % http://en.wikipedia.org/wiki/IBM_Floating_Point_Architecture % [3] see http://old.nabble.com/Re%3A-IBM-integer-and-double-formats-p20428979.html % [4] STATA File Format % http://www.stata.com/help.cgi?dta % http://www.stata.com/help.cgi?dta_113 % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: xptopen.m 12495 2014-01-12 19:50:10Z schloegl $ % Copyright (C) 2010,2011,2012,2014 by Alois Schloegl % This is part of the NaN-toolbox. For more details see % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if exist('xptopen','file')~=3 error('xptopen.mex is not compiled') end; NaN/inst/nanconv.m0000664002356700235670000000411611531311060014530 0ustar schloeglschloeglfunction [C,N,c] = nanconv(X,Y,arg3) % NANCONV computes the convolution for data with missing values. % X and Y can contain missing values encoded with NaN. % NaN's are skipped, NaN do not result in a NaN output. % The output gives NaN only if there are insufficient input data % % [...] = NANCONV(X,Y); % calculates 2-dim convolution between X and Y % [C] = NANCONV(X,Y); % % WARNING: missing values can introduce aliasing - causing unintended results. % Moreover, the behavior of bandpass and highpass filters in case of missing values % is not fully understood, and might contain some pitfalls. % % see also: CONV, NANCONV2, NANFFT, NANFILTER % $Id: conv2nan.m 6973 2010-02-28 20:19:12Z schloegl $ % Copyright (C) 2000-2005,2010,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ and % http://octave.svn.sourceforge.net/viewvc/octave/trunk/octave-forge/extra/NaN/inst/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . warning('NANCONV is experimental. For more details see HELP NANCONV'); if nargin~=2, fprintf(2,'Error NANCONV2: incorrect number of input arguments\n'); end; m = isnan(X); n = isnan(Y); X(m) = 0; Y(n) = 0; C = conv(X,Y); % 2-dim convolution N = conv(real(~m),real(~n)); % normalization term c = conv(ones(size(X)),ones(size(Y))); % correction of normalization if nargout==1, C = C.*c./N; elseif nargout==2, N = N./c; end; NaN/inst/zScoreMedian.m0000664002356700235670000000332511520323414015456 0ustar schloeglschloeglfunction Z = zScoreMedian(X, DIM) % zScoreMedian removes the median and standardizes by the 1.483*median absolute deviation % % Usage: Z = zScoreMedian(X, DIM) % Input: X : data % DIM: dimension along which z-score should be calculated (1=columns, 2=rows) % (optional, default=first dimension with more than 1 element % Output: Z : z-scores % Copyright (C) 2003 Patrick Houweling % Copyright (C) 2009 Alois Schloegl % $Id: zScoreMedian.m 8075 2011-01-27 17:10:36Z schloegl $ % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software: you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation, either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program. If not, see . % input checks if any(size(X)==0), return; end; % robust moment estimators: % - mean: median % - standard deviation: 1.483 * median absolute deviation (medAbsDev); % the factor 1.483 is the ratio of the standard deviation of a normal random variable to its MAD. if nargin<2, [D, M] = medAbsDev(X); else [D, M] = medAbsDev(X, DIM); end; % z-score: subtract M and divide by 1.483*D Z = (X - repmat(M, size(X)./size(M))) ./ repmat(1.483*D, size(X)./size(D)); NaN/inst/coefficient_of_variation.m0000664002356700235670000000270011553522126020114 0ustar schloeglschloeglfunction cv=coefficient_of_variation(i,DIM) % COEFFICIENT_OF_VARIATION returns STD(X)/MEAN(X) % % cv=coefficient_of_variation(x [,DIM]) % cv=std(x)/mean(x) % % see also: SUMSKIPNAN, MEAN, STD % % REFERENCE(S): % http://mathworld.wolfram.com/VariationCoefficient.html % $Id: coefficient_of_variation.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 1997-2003 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin<2, DIM = find(size(i)>1,1); if isempty(DIM), DIM=1; end; end; [S,N,SSQ] = sumskipnan(i,DIM); % sqrt((SSQ-S.*S./N)./max(N-1,0))/(S./N); % = std(i)/mean(i) cv = sqrt(SSQ.*N./(S.*S)-1); %if flag_implicit_unbiased_estim, cv = cv.*sqrt(N./max(N-1,0)); %end; NaN/inst/rankcorr.m0000664002356700235670000000342011553522126014717 0ustar schloeglschloeglfunction r = rankcorr(X,Y) % RANKCORR calculated the rank correlation coefficient. % This function is replaced by CORRCOEF. % Significance test and confidence intervals can be obtained from CORRCOEF, too. % % R = CORRCOEF(X, [Y, ] 'Rank'); % % The rank correlation r = corrcoef(ranks(x)). % is often confused with Spearman's rank correlation. % Spearman's correlation is defined as % r(x,y) = 1-6*sum((ranks(x)-ranks(y)).^2)/(N*(N*N-1)) % The results are different. Here, the former version is implemented. % % see also: CORRCOEF, SPEARMAN, RANKS % % REFERENCES: % [1] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html % [2] http://mathworld.wolfram.com/CorrelationCoefficient.html % $Id: rankcorr.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2003 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % warning('RANKCORR might become obsolete; use CORRCOEF(ranks(x)) or CORRCOEF(...,''Rank'') instead'); if nargin < 2 r = corrcoef(ranks(X)); else r = corrcoef(ranks(X),ranks(Y)); endNaN/inst/iqr.m0000664002356700235670000000271011553522126013672 0ustar schloeglschloeglfunction Q=iqr(Y,DIM) % IQR calculates the interquartile range % Missing values (encoded as NaN) are ignored. % % Q = iqr(Y) % Q = iqr(Y,DIM) % returns the IQR along dimension DIM of sample array Y. % % Q = iqr(HIS) % returns the IQR from the histogram HIS. % HIS must be a HISTOGRAM struct as defined in HISTO2 or HISTO3. % % see also: MAD, RANGE, HISTO2, HISTO3, PERCENTILE, QUANTILE % $Id$ % Copyright (C) 2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin<2, DIM = []; end; if isempty(DIM), DIM = min(find(size(Y)>1)); if isempty(DIM), DIM = 1; end; end; if nargin<1, help iqr else Q = quantile(Y,[1,3]/4,DIM); Q = diff(Q,[],DIM); end; NaN/inst/cor.m0000664002356700235670000000606411553522126013670 0ustar schloeglschloeglfunction [r2] = cor(X,Y); % COR calculates the correlation matrix % X and Y can contain missing values encoded with NaN. % NaN's are skipped, NaN do not result in a NaN output. % (Its assumed that the occurence of NaN's is uncorrelated) % The output gives NaN only if there are insufficient input data % % COR(X); % calculates the (auto-)correlation matrix of X % COR(X,Y); % calculates the crosscorrelation between X and Y % % c = COR(...); % c is the correlation matrix % % W weights to compute weighted mean (default: []) % if W=[], all weights are 1. % number of elements in W must match size(x,DIM) % NOTE: Under certain circumstances (Missing values and small number of samples) % abs(COR) can be larger than 1. % If you need abs(COR)<=1, use CORRCOEF. CORRCOEF garantees abs(COR)<=1. % % see also: SUMSKIPNAN, COVM, COV, CORRCOEF % % REFERENCES: % http://mathworld.wolfram.com/CorrelationCoefficient.html % $Id: cor.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2004,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin==1 Y = []; elseif nargin==0 fprintf(2,'Error COR: Missing argument(s)\n'); end; [r1,c1]=size(X); if (c1>r1), fprintf(2,'Warning COR: Covariance is ill-defined, because of too less observations (rows).\n'); end; [r1,c1]=size(X); if ~isempty(Y) [r2,c2]=size(Y); if r1~=r2, fprintf(2,'Error COR: X and Y must have the same number of observations (rows).\n'); return; end; else [r2,c2]=size(X); end; if (c1>r1) || (c2>r2), fprintf(2,'Warning COR: Covariance is ill-defined, because of too less observations (rows).\n'); end; if ~isempty(Y), [S1,N1,SSQ1] = sumskipnan(X,1); [S2,N2,SSQ2] = sumskipnan(Y,1); NN = double(~isnan(X)')*double(~isnan(Y)); X(isnan(X)) = 0; % skip NaN's Y(isnan(Y)) = 0; % skip NaN's CC = X'*Y; M1 = S1./N1; M2 = S2./N2; cc = CC./NN - M1'*M2; r2 = cc./sqrt((SSQ1./N1-M1.*M1)'*(SSQ2./N2-M2.*M2)); else [S,N,SSQ] = sumskipnan(X,1); NN = double(~isnan(X)')*double(~isnan(X)); X(isnan(X)) = 0; % skip NaN's CC = X'*X; M = S./N; cc = CC./NN - M'*M; v = (SSQ./N- M.*M); %max(N-1,0); r2 = cc./sqrt(v'*v); end; NaN/inst/naninsttest.m0000664002356700235670000001271511553522126015457 0ustar schloeglschloegl% NANINSTTEST checks whether the functions from NaN-toolbox have been % correctly installed. % % see also: NANTEST % $Id: naninsttest.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2003 by Alois Schloegl % This script is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . r = zeros(38,2); x = [5,NaN,0,1,nan]; % run test, k=1: with NaNs, k=2: all NaN's are removed % the result of both should be the same. %FLAG_WARNING = warning; warning('off'); funlist = {'sumskipnan','mean','std','var','skewness','kurtosis','sem','median','mad','zscore','coefficient_of_variation','geomean','harmmean','meansq','moment','rms','','corrcoef','rankcorr','spearman','ranks','center','trimean','min','max','tpdf','tcdf','tinv','normpdf','normcdf','norminv','nansum','nanstd','histo_mex','sumskipnan_mex','covm_mex','svmtrain_mex','train','','','','','','','',''}; for k=1:2, if k==2, x(isnan(x))=[]; end; r(1,k) =sumskipnan(x(1)); r(2,k) =mean(x); r(3,k) =std(x); r(4,k) =var(x); r(5,k) = skewness(x); r(6,k) =kurtosis(x); r(7,k) =sem(x); r(8,k) =median(x); r(9,k) =mad(x); tmp = zscore(x); r(10,k)=tmp(1); if exist('coefficient_of_variation','file'), r(11,k)=coefficient_of_variation(x); end; r(12,k)=geomean(x); r(13,k)=harmmean(x); if exist('meansq','file'), r(14,k)=meansq(x); end; if exist('moment','file'), r(15,k)=moment(x,6); end; if exist('rms','file'), r(16,k)=rms(x); end; % r(17,k) is currently empty. tmp=corrcoef(x',(1:length(x))'); r(18,k)=any(isnan(tmp(:))); if exist('rankcorr','file'), tmp=rankcorr(x',(1:length(x))'); r(19,k)=any(isnan(tmp(:))); end; if exist('spearman','file'), tmp=spearman(x',(1:length(x))'); r(20,k)=any(isnan(tmp(:))); end; if exist('ranks','file'), r(21,k)=any(isnan(ranks(x')))+k; end; if exist('center','file'), tmp=center(x); r(22,k)=tmp(1); end; if exist('trimean','file'), r(23,k)=trimean(x); end; r(24,k)=min(x); r(25,k)=max(x); r(26,k) = k+isnan(tpdf(x(2),4)); try r(27,k) = k*(~isnan(tcdf(nan,4))); catch r(27,k) = k; end; r(28,k) = k*(~isnan(tinv(NaN,4))); if exist('normpdf','file'), fun='normpdf'; elseif exist('normal_pdf','file'), fun='normal_pdf'; end; r(29,k) = (feval(fun,k,k,0)~=Inf)*k; if exist('normcdf','file'), fun='normcdf'; elseif exist('normal_cdf','file'), fun='normal_cdf'; end; r(30,k) = feval(fun,4,4,0); if exist('norminv','file'), fun='norminv'; elseif exist('normal_inv','file'), fun='normal_inv'; end; r(31,k) = k*any(isnan(feval(fun,[0,1],4,0))); if exist('nansum','file'), r(32,k)=k*isnan(nansum(nan)); end; if exist('nanstd','file'), r(33,k)=k*(~isnan(nanstd(0))); end; %%% check mex files try histo_mex([1:5]'); r(34,k)=0; catch; r(34,k)=k; end; try sumskipnan_mex([1:5]'); r(35,k)=0; catch; r(35,k)=k; end; try covm_mex([1:5]'); r(36,k)=0; catch; r(36,k)=k; end; if ~exist('svmtrain_mex','file'), r(37,k)=k; end; if ~exist('train','file'), r(38,k)=k; end; end; % check if result is correct tmp = abs(r(:,1)-r(:,2)). % $Id: median.m 9033 2011-11-08 20:58:07Z schloegl $ % Copyright (C) 2000-2003,2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ global FLAG_NANS_OCCURED; % check dimension of x sz=size(x); % find the dimension for median if nargin<2, DIM=min(find(sz>1)); if isempty(DIM), DIM=1; end; end; if DIM>length(sz), sz = [sz,ones(1,DIM-length(sz))]; end; D1 = prod(sz(1:DIM-1)); D2 = sz(DIM); D3 = prod(sz(DIM+1:length(sz))); D0 = [sz(1:DIM-1),1,sz(DIM+1:length(sz))]; y = repmat(nan,D0); flag_MexKthElement = exist('kth_element','file')==3; for k = 0:D1-1, for l = 0:D3-1, xi = k + l * D1*sz(DIM) + 1 ; xo = k + l * D1 + 1; t = x(xi+(0:sz(DIM)-1)*D1); t = t(~isnan(t)); n = length(t); if n==0, y(xo) = nan; elseif flag_MexKthElement, if (D1==1) t = t+0.0; end; % make sure a real copy (not just a reference to x) is used flag_KthE = 0; % fast kth_element can be used, because t does not contain any NaN and there is need to care about in-place sorting if ~rem(n,2), y(xo) = sum( kth_element( double(t), n/2 + [0,1], flag_KthE) ) / 2; elseif rem(n,2), y(xo) = kth_element(double(t), (n+1)/2, flag_KthE); end; else t = sort(t); if ~rem(n,2), y(xo) = (t(n/2) + t(n/2+1)) / 2; elseif rem(n,2), y(xo) = t((n+1)/2); end; end if (n % This is part of the NaN-toolbox. For more details see % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin<2, FLAG = 0; end; if nargin<3, DIM = []; end; if isempty(FLAG), FLAG = 0; end; if isempty(DIM), DIM = find(size(x)>1,1); if isempty(DIM), DIM=1; end; end; [y,n,ssq] = sumskipnan(x,DIM); if all(ssq(:).*n(:) > 2*(y(:).^2)), %% rounding error is neglectable y = ssq - y.*y./n; else %% rounding error is not neglectable [y,n] = sumskipnan(center(x,DIM).^2,DIM); end; if (FLAG==1) y = sqrt(y./n); % normalize with N else % default method y = sqrt(y./max(n-1,0)); % normalize with N-1 end; %!assert(nanstd(0),NaN) NaN/inst/flag_nans_occured.m0000664002356700235670000000304611553522126016536 0ustar schloeglschloeglfunction [flag]=flag_nans_occured() % FLAG_NANS_OCCURED checks whether the last call(s) to sumskipnan or covm % contained any not-a-numbers in the input argument. Because many other % functions like mean, std, etc. are also using sumskipnan, % also these functions can be checked for NaN's in the input data. % % A call to FLAG_NANS_OCCURED() resets also the flag whether NaN's occured. % Only sumskipnan or covm can set the flag again. % % see also: SUMSKIPNAN, COVM % $Id$ % Copyright (C) 2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software: you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation, either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program. If not, see . global FLAG_NANS_OCCURED; %%% check whether FLAG was already defined if isempty(FLAG_NANS_OCCURED), FLAG_NANS_OCCURED = logical(0); % default value end; flag = FLAG_NANS_OCCURED; % return value FLAG_NANS_OCCURED = logical(0); % reset flag return; NaN/inst/geomean.m0000664002356700235670000000322411553522126014513 0ustar schloeglschloeglfunction [y] = geomean(x,DIM,W) % GEOMEAN calculates the geomentric mean of data elements. % % y = geomean(x [,DIM [,W]]) is the same as % y = mean(x,'G' [,DIM]) % % DIM dimension % 1 STD of columns % 2 STD of rows % default or []: first DIMENSION, with more than 1 element % W weights to compute weighted mean (default: []) % if W=[], all weights are 1. % number of elements in W must match size(x,DIM) % % features: % - can deal with NaN's (missing values) % - weighting of data % - dimension argument also in Octave % - compatible to Matlab and Octave % % see also: SUMSKIPNAN, MEAN, HARMMEAN % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: geomean.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2002,2009 by Alois Schloegl % This is part of the NaN-toolbox. For more details see % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if nargin<2 DIM=min(find(size(x)>1)); if isempty(DIM), DIM=1; end; end if nargin<3 W = []; end; [y, n] = sumskipnan(log(x),DIM,W); y = exp (y./n); NaN/inst/norminv.m0000664002356700235670000000352111656313737014602 0ustar schloeglschloeglfunction x = norminv(p,m,s) % NORMINV returns inverse cumulative function of the normal distribution % % x = norminv(p,m,s); % % Computes the quantile (inverse of the CDF) of a the normal % cumulative distribution with mean m and standard deviation s % default: m=0; s=1; % p,m,s must be matrices of same size, or any one can be a scalar. % % see also: NORMPDF, NORMCDF % Reference(s): % $Id: norminv.m 9033 2011-11-08 20:58:07Z schloegl $ % Copyright (C) 2000-2003,2010,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin==1, m=0; s=1; elseif nargin==2, s=1; end; % allocate output memory and check size of arguments x = sqrt(2)*erfinv(2*p - 1).*s + m; % if this line causes an error, input arguments do not fit. x((p>1) | (p<0) | isnan(p) | isnan(m) | isnan(s) | (s<0)) = nan; k = (s==0) & ~isnan(m); % temporary variable, reduces number of tests. x((p==0) & k) = -inf; x((p==1) & k) = +inf; k = (p>0) & (p<1) & k; if numel(m)==1, x(k) = m; else x(k) = m(k); end; %!assert(sum(~isnan(norminv([-inf,-.2,0,.2,.5,1,2,inf,nan],2,0))),4) NaN/inst/cat2bin.m0000664002356700235670000000555311656313737014443 0ustar schloeglschloeglfunction [B,BLab]=cat2bin(D, Label, MODE) % CAT2BIN converts categorial into binary data % each category of each column in D is converted into a logical column % % B = cat2bin(C); % [B,BinLabel] = cat2bin(C,Label); % [B,BinLabel] = cat2bin(C,Label,MODE) % % C categorial data % B binary data % Label description of each column in C % BinLabel description of each column in B % MODE default [], ignores NaN % 'notIgnoreNAN' includes binary column for NaN % 'IgnoreZeros' zeros do not get a separate category % 'IgnoreZeros+NaN' zeros and NaN are ignored % % example: % cat2bin([1;2;5;1;5]) results in % 1 0 0 % 0 1 0 % 0 0 1 % 1 0 0 % 0 0 1 % $Id: cat2bin.m 9033 2011-11-08 20:58:07Z schloegl $ % Copyright (C) 2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. if nargin<3, MODE = []; end; % convert data B = []; c = 0; k1 = 0; BLab = []; for m = 1:size(D,2) h = histo_mex(D(:,m)); x = h.X(h.H>0); if strcmpi(MODE,'notIgnoreNaN') ; elseif strcmpi(MODE,'IgnoreZeros') x = x(x~=0); elseif strcmpi(MODE,'IgnoreZeros+NaN') x = x((x~=0) & (x==x)); else x = x(x==x); end; for k = 1:size(D,1), if ~isnan(D(k,m)) B(k, c + find(D(k,m)==x)) = 1; elseif isnan(x(end)), B(k, c + length(x)) = 1; end; end; c = c + length(x); if nargout>1, for k = 1:length(x), k1 = k1+1; if isempty(Label) BLab{k1} = ['#',int2str(m),':',int2str(x(k))]; else BLab{k1} = [Label{m},':',int2str(x(k))]; end; end; end; end; %!assert(cat2bin([1;2;5;1;5]),[1,0,0;0,1,0;0,0,1;1,0,0;0,0,1]) NaN/inst/zscore.m0000664002356700235670000000412112404650514014401 0ustar schloeglschloeglfunction [i,m,s] = zscore(i,OPT, DIM, W) % ZSCORE removes the mean and normalizes data % to a variance of 1. Can be used for pre-whitening of data, too. % % [z,mu, sigma] = zscore(x [,OPT [, DIM]) % z z-score of x along dimension DIM % sigma is the inverse of the standard deviation % mu is the mean of x % % The data x can be reconstucted with % x = z*diag(sigma) + repmat(m, size(z)./size(m)) % z = x*diag(1./sigma) - repmat(m.*v, size(z)./size(m)) % % DIM dimension % 1: STATS of columns % 2: STATS of rows % default or []: first DIMENSION, with more than 1 element % % see also: SUMSKIPNAN, MEAN, STD, DETREND % % REFERENCE(S): % [1] http://mathworld.wolfram.com/z-Score.html % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: zscore.m 12708 2014-09-12 20:02:20Z schloegl $ % Copyright (C) 2000-2003,2009,2014 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if any(size(i)==0); return; end; if nargin<2 OPT=[]; end if nargin<3 DIM=[]; end if nargin<4 W = []; end if ~isempty(OPT) && ~any(OPT==[0,1]) error('OPT must be 0, 1 or empty.') end if isempty(DIM), DIM=min(find(size(i)>1)); if isempty(DIM), DIM=1; end; end; % pre-whitening [S,N,SSQ] = sumskipnan(i, DIM, W); m = S./N; i = i-repmat(m, size(i)./size(m)); % remove mean s = std (i, OPT, DIM, W); s(s==0)=1; i = i ./ repmat(s,size(i)./size(s)); % scale to var=1 NaN/inst/tiedrank.m0000664002356700235670000000307511553522126014705 0ustar schloeglschloeglfunction R=tiedrank(X,DIM) % TIEDRANK compute rank of samples, the mean value is used in case of ties % this function is just a wrapper for RANKS, and provided for compatibility % with the statistics toolbox of matlab(tm) % % R = tiedrank(X) % computes the rank R of vector X % % see also: RANKS % $Id: tiedrank.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin>1, error('more than 1 input argument is currently not supported ') end; if nargout>1, error('more than 1 output argument is currently not supported ') end; if nargin<2, DIM = []; end; if isempty(DIM), DIM = find(size(X)>1,1); if isempty(DIM), DIM = 1; end; end if (DIM<1), DIM = 1; end; %% Hack, because min([])=0 for FreeMat v3.5 R = ranks(X,DIM); NaN/inst/kurtosis.m0000664002356700235670000000423511553522126014766 0ustar schloeglschloeglfunction R=kurtosis(i,DIM) % KURTOSIS estimates the kurtosis % % y = kurtosis(x,DIM) % calculates kurtosis of x in dimension DIM % % DIM dimension % 1: STATS of columns % 2: STATS of rows % default or []: first DIMENSION, with more than 1 element % % features: % - can deal with NaN's (missing values) % - dimension argument % - compatible to Matlab and Octave % % see also: SUMSKIPNAN, VAR, STD, VAR, SKEWNESS, MOMENT, STATISTIC, % IMPLICIT_SKIP_NAN % % REFERENCE(S): % http://mathworld.wolfram.com/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: kurtosis.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2003 by Alois Schloegl % This function is part of the NaN-toolbox for Octave and Matlab % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if nargin==1, DIM=min(find(size(i)>1)); if isempty(DIM), DIM=1; end; end; [R.SUM,R.N,R.SSQ] = sumskipnan(i,DIM); % sum R.MEAN = R.SUM./R.N; % mean R.SSQ0 = R.SSQ - real(R.SUM).*real(R.MEAN) - imag(R.SUM).*imag(R.MEAN); % sum square with mean removed %if flag_implicit_unbiased_estim; %% ------- unbiased estimates ----------- n1 = max(R.N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and SEM are INF %else % n1 = R.N; %end; R.VAR = R.SSQ0./n1; % variance (unbiased) %R.STD = sqrt(R.VAR); % standard deviation i = i - repmat(R.MEAN,size(i)./size(R.MEAN)); %R.CM3 = sumskipnan(i.^3,DIM)./n1; R.CM4 = sumskipnan(i.^4,DIM)./n1; %R.SKEWNESS = R.CM3./(R.STD.^3); R = R.CM4./(R.VAR.^2)-3; NaN/inst/hist2res.m0000664002356700235670000001104211672347406014650 0ustar schloeglschloeglfunction [R]=hist2res(H,fun) % Evaluates Histogram data % [R]=hist2res(H) % % [y]=hist2res(H,fun) % estimates fun-statistic % % fun 'mean' mean % 'std' standard deviation % 'var' variance % 'sem' standard error of the mean % 'rms' root mean square % 'meansq' mean of squares % 'sum' sum % 'sumsq' sum of squares % 'CM#' central moment of order # % 'skewness' skewness % 'kurtosis' excess coefficient (Fisher kurtosis) % % see also: NaN/statistic % % REFERENCES: % [1] C.L. Nikias and A.P. Petropulu "Higher-Order Spectra Analysis" Prentice Hall, 1993. % [2] C.E. Shannon and W. Weaver "The mathematical theory of communication" University of Illinois Press, Urbana 1949 (reprint 1963). % [3] http://www.itl.nist.gov/ % [4] http://mathworld.wolfram.com/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 2 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. % $Id: hist2res.m 9387 2011-12-15 10:42:14Z schloegl $ % Copyright (c) 1996-2002,2006 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if strcmp(H.datatype,'HISTOGRAM'), elseif strcmp(H.datatype,'qc:histo') HDR = H; if isfield(H,'THRESHOLD'), TH = H.THRESHOLD; else TH = repmat([-inf,inf],HDR.NS,1); end; HIS = H.HIS; % remove overflowing samples HIS.N = sumskipnan(HIS.H); for k = 1:size(HIS.H,2); t = HIS.X(:,min(k,size(HIS.X,2))); HIS.H(xor(t<=min(TH(k,:)), t>=max(TH(k,:))),k) = 0; end; Nnew = sumskipnan(HIS.H); R.ratio_lost = 1-Nnew./HIS.N; HIS.N = Nnew; % scale into physical values if H.FLAG.UCAL, %t = HIS.X; %for k=1:length(HDR.InChanSelect), % HIS.X(:,k) = t(:,min(size(t,2),k))*HDR.Calib(k+1,k)+HDR.Calib(1,k); %end; HIS.X = [ones(size(HIS.X,1),1),repmat(HIS.X,1,size(HIS.H,2)./size(HIS.X,2))]*H.Calib; end; H = HIS; else fprintf(2,'ERROR: arg1 is not a histogram\n'); return; end; if nargin<2, fun=[]; end; global FLAG_implicit_unbiased_estimation; %%% check whether FLAG was already defined if ~exist('FLAG_implicit_unbiased_estimation','var'), FLAG_implicit_unbiased_estimation=[]; end; %%% set DEFAULT value of FLAG if isempty(FLAG_implicit_unbiased_estimation), FLAG_implicit_unbiased_estimation=logical(1); end; sz = size(H.H)./size(H.X); R.N = sumskipnan(H.H,1); R.SUM = sumskipnan(H.H.*repmat(H.X,sz),1); R.SSQ = sumskipnan(H.H.*repmat(H.X.*H.X,sz),1); %R.S3P = sumskipnan(H.H.*repmat(H.X.^3,sz),1); % sum of 3rd power R.S4P = sumskipnan(H.H.*repmat(H.X.^4,sz),1); % sum of 4th power %R.S5P = sumskipnan(H.H.*repmat(H.X.^5,sz),1); % sum of 5th power R.MEAN = R.SUM./R.N; R.MSQ = R.SSQ./R.N; R.RMS = sqrt(R.MSQ); R.SSQ0 = R.SSQ-R.SUM.*R.MEAN; % sum square of mean removed if FLAG_implicit_unbiased_estimation, n1 = max(R.N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and STE are INF else n1 = R.N; end; R.VAR = R.SSQ0./n1; % variance (unbiased) R.STD = sqrt(R.VAR); % standard deviation R.SEM = sqrt(R.SSQ0./(R.N.*n1)); % standard error of the mean R.SEV = sqrt(n1.*(n1.*R.S4P./R.N+(R.N.^2-2*R.N+3).*(R.SSQ./R.N).^2)./(R.N.^3)); % standard error of the variance R.Coefficient_of_variation = R.STD./R.MEAN; R.CM2 = R.SSQ0./n1; x = repmat(H.X,sz) - repmat(R.MEAN,size(H.X,1),1); R.CM3 = sumskipnan(H.H.*(x.^3),1)./n1; R.CM4 = sumskipnan(H.H.*(x.^4),1)./n1; %R.CM5 = sumskipnan(H.H.*(x.^5),1)./n1; R.SKEWNESS = R.CM3./(R.STD.^3); R.KURTOSIS = R.CM4./(R.VAR.^2)-3; R.MAD = sumskipnan(H.H.*abs(x),1)./R.N; % mean absolute deviation H.PDF = H.H./H.N(ones(size(H.H,1),1),:); status=warning('off'); R.ENTROPY = -sumskipnan(H.PDF.*log2(H.PDF),1); warning(status); R.QUANT = repmat(min(diff(H.X,[],1)),1,size(H.H,2)/size(H.X,2)); R.MAX = max(H.X); R.MIN = min(H.X); R.RANGE = R.MAX-R.MIN; if ~isempty(fun), fun=upper(fun); if strncmp(fun,'CM',2) oo = str2double(fun(3:length(fun))); R = sumskipnan(H.PDF.*(x.^oo),1); else R = getfield(R,fun); end; end; NaN/inst/nanfft.m0000664002356700235670000000402611601145313014346 0ustar schloeglschloeglfunction [Y,N,N2] = nanfft(X,N,DIM); % NANFFT calculates the Fourier-Transform of X for data with missing values. % NANFFT is the same as FFT but X can contain missing values encoded with NaN. % NaN's are skipped, NaN do not result in a NaN output. % % Y = NANFFT(X) % Y = NANFFT(X,N) % Y = NANFFT(X,[],DIM) % % [Y,N] = NANFFT(...) % returns the number of valid samples N % % % WARNING: missing values can introduce aliasing - causing unintended results. % Moreover, the behavior of bandpass and highpass filters in case of missing values % is not fully understood, and might contain some pitfalls. % % see also: FFT, XCORR, NANCONV, NANFILTER % $Id$ % Copyright (C) 2005,2011 by Alois Schloegl % This function is part of the NaN-toolbox available at % http://pub.ist.ac.at/~schloegl/matlab/NaN/ and % http://octave.svn.sourceforge.net/viewvc/octave/trunk/octave-forge/extra/NaN/inst/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA warning('NANFFT is experimental. For more details see HELP NANFFT'); NX = isnan(X); X(NX) = 0; if nargin==1, Y = fft(X); N2 = sum(1-NX); % N = fft(NX); elseif nargin==2, Y = fft(X,N); N2 = sum(1-NX); N = fft(NX); elseif nargin==3, Y = fft(X,N,DIM); N2 = sum(1-NX,DIM); % N = fft(NX,N,DIM); end; NaN/inst/train_sc.m0000664002356700235670000013230211714752314014705 0ustar schloeglschloeglfunction [CC]=train_sc(D,classlabel,MODE,W) % Train a (statistical) classifier % % CC = train_sc(D,classlabel) % CC = train_sc(D,classlabel,MODE) % CC = train_sc(D,classlabel,MODE, W) % weighting D(k,:) with weight W(k) (not all classifiers supported weighting) % % CC contains the model parameters of a classifier which can be applied % to test data using test_sc. % R = test_sc(CC,D,...) % % D training samples (each row is a sample, each column is a feature) % classlabel labels of each sample, must have the same number of rows as D. % Two different encodings are supported: % {-1,1}-encoding (multiple classes with separate columns for each class) or % 1..M encoding. % So [1;2;3;1;4] is equivalent to % [+1,-1,-1,-1; % [-1,+1,-1,-1; % [-1,-1,+1,-1; % [+1,-1,-1,-1] % [-1,-1,-1,+1] % Note, samples with classlabel=0 are ignored. % % The following classifier types are supported MODE.TYPE % 'MDA' mahalanobis distance based classifier [1] % 'MD2' mahalanobis distance based classifier [1] % 'MD3' mahalanobis distance based classifier [1] % 'GRB' Gaussian radial basis function [1] % 'QDA' quadratic discriminant analysis [1] % 'LD2' linear discriminant analysis (see LDBC2) [1] % MODE.hyperparameter.gamma: regularization parameter [default 0] % 'LD3', 'FDA', 'LDA', 'FLDA' % linear discriminant analysis (see LDBC3) [1] % MODE.hyperparameter.gamma: regularization parameter [default 0] % 'LD4' linear discriminant analysis (see LDBC4) [1] % MODE.hyperparameter.gamma: regularization parameter [default 0] % 'LD5' another LDA (motivated by CSP) % MODE.hyperparameter.gamma: regularization parameter [default 0] % 'RDA' regularized discriminant analysis [7] % MODE.hyperparameter.gamma: regularization parameter % MODE.hyperparameter.lambda = % gamma = 0, lambda = 0 : MDA % gamma = 0, lambda = 1 : LDA [default] % Hint: hyperparameter are used only in test_sc.m, testing different % the hyperparameters do not need repetitive calls to train_sc, % it is sufficient to modify CC.hyperparameter before calling test_sc. % 'GDBC' general distance based classifier [1] % '' statistical classifier, requires Mode argument in TEST_SC % '###/DELETION' if the data contains missing values (encoded as NaNs), % a row-wise or column-wise deletion (depending on which method % removes less data values) is applied; % '###/GSVD' GSVD and statistical classifier [2,3], % '###/sparse' sparse [5] % '###' must be 'LDA' or any other classifier % 'PLS' (linear) partial least squares regression % 'REG' regression analysis; % 'WienerHopf' Wiener-Hopf equation % 'NBC' Naive Bayesian Classifier [6] % 'aNBC' Augmented Naive Bayesian Classifier [6] % 'NBPW' Naive Bayesian Parzen Window [9] % % 'PLA' Perceptron Learning Algorithm [11] % MODE.hyperparameter.alpha = alpha [default: 1] % w = w + alpha * e'*x % 'LMS', 'AdaLine' Least mean squares, adaptive line element, Widrow-Hoff, delta rule % MODE.hyperparameter.alpha = alpha [default: 1] % 'Winnow2' Winnow2 algorithm [12] % % 'PSVM' Proximal SVM [8] % MODE.hyperparameter.nu (default: 1.0) % 'LPM' Linear Programming Machine % uses and requires train_LPM of the iLog CPLEX optimizer % MODE.hyperparameter.c_value = % 'CSP' CommonSpatialPattern is very experimental and just a hack % uses a smoothing window of 50 samples. % 'SVM','SVM1r' support vector machines, one-vs-rest % MODE.hyperparameter.c_value = % 'SVM11' support vector machines, one-vs-one + voting % MODE.hyperparameter.c_value = % 'RBF' Support Vector Machines with RBF Kernel % MODE.hyperparameter.c_value = % MODE.hyperparameter.gamma = % 'SVM:LIB' libSVM [default SVM algorithm) % 'SVM:bioinfo' uses and requires svmtrain from the bioinfo toolbox % 'SVM:OSU' uses and requires mexSVMTrain from the OSU-SVM toolbox % 'SVM:LOO' uses and requires svcm_train from the LOO-SVM toolbox % 'SVM:Gunn' uses and requires svc-functios from the Gunn-SVM toolbox % 'SVM:KM' uses and requires svmclass-function from the KM-SVM toolbox % 'SVM:LINz' LibLinear [10] (requires train.mex from LibLinear somewhere in the path) % z=0 (default) LibLinear with -- L2-regularized logistic regression % z=1 LibLinear with -- L2-loss support vector machines (dual) % z=2 LibLinear with -- L2-loss support vector machines (primal) % z=3 LibLinear with -- L1-loss support vector machines (dual) % 'SVM:LIN4' LibLinear with -- multi-class support vector machines by Crammer and Singer % 'DT' decision tree - not implemented yet. % % {'REG','MDA','MD2','QDA','QDA2','LD2','LD3','LD4','LD5','LD6','NBC','aNBC','WienerHopf','LDA/GSVD','MDA/GSVD', 'LDA/sparse','MDA/sparse', 'PLA', 'LMS','LDA/DELETION','MDA/DELETION','NBC/DELETION','RDA/DELETION','REG/DELETION','RDA','GDBC','SVM','RBF','PSVM','SVM11','SVM:LIN4','SVM:LIN0','SVM:LIN1','SVM:LIN2','SVM:LIN3','WINNOW', 'DT'}; % % CC contains the model parameters of a classifier. Some time ago, % CC was a statistical classifier containing the mean % and the covariance of the data of each class (encoded in the % so-called "extended covariance matrices". Nowadays, also other % classifiers are supported. % % see also: TEST_SC, COVM, ROW_COL_DELETION % % References: % [1] R. Duda, P. Hart, and D. Stork, Pattern Classification, second ed. % John Wiley & Sons, 2001. % [2] Peg Howland and Haesun Park, % Generalizing Discriminant Analysis Using the Generalized Singular Value Decomposition % IEEE Transactions on Pattern Analysis and Machine Intelligence, 26(8), 2004. % dx.doi.org/10.1109/TPAMI.2004.46 % [3] http://www-static.cc.gatech.edu/~kihwan23/face_recog_gsvd.htm % [4] Jieping Ye, Ravi Janardan, Cheong Hee Park, Haesun Park % A new optimization criterion for generalized discriminant analysis on undersampled problems. % The Third IEEE International Conference on Data Mining, Melbourne, Florida, USA % November 19 - 22, 2003 % [5] J.D. Tebbens and P. Schlesinger (2006), % Improving Implementation of Linear Discriminant Analysis for the Small Sample Size Problem % Computational Statistics & Data Analysis, vol 52(1): 423-437, 2007 % http://www.cs.cas.cz/mweb/download/publi/JdtSchl2006.pdf % [6] H. Zhang, The optimality of Naive Bayes, % http://www.cs.unb.ca/profs/hzhang/publications/FLAIRS04ZhangH.pdf % [7] J.H. Friedman. Regularized discriminant analysis. % Journal of the American Statistical Association, 84:165–175, 1989. % [8] G. Fung and O.L. Mangasarian, Proximal Support Vector Machine Classifiers, KDD 2001. % Eds. F. Provost and R. Srikant, Proc. KDD-2001: Knowledge Discovery and Data Mining, August 26-29, 2001, San Francisco, CA. % p. 77-86. % [9] Kai Keng Ang, Zhang Yang Chin, Haihong Zhang, Cuntai Guan. % Filter Bank Common Spatial Pattern (FBCSP) in Brain-Computer Interface. % IEEE International Joint Conference on Neural Networks, 2008. IJCNN 2008. (IEEE World Congress on Computational Intelligence). % 1-8 June 2008 Page(s):2390 - 2397 % [10] R.-E. Fan, K.-W. Chang, C.-J. Hsieh, X.-R. Wang, and C.-J. Lin. % LIBLINEAR: A Library for Large Linear Classification, Journal of Machine Learning Research 9(2008), 1871-1874. % Software available at http://www.csie.ntu.edu.tw/~cjlin/liblinear % [11] http://en.wikipedia.org/wiki/Perceptron#Learning_algorithm % [12] Littlestone, N. (1988) % "Learning Quickly When Irrelevant Attributes Abound: A New Linear-threshold Algorithm" % Machine Learning 285-318(2) % http://en.wikipedia.org/wiki/Winnow_(algorithm) % $Id: train_sc.m 9601 2012-02-09 14:14:36Z schloegl $ % Copyright (C) 2005,2006,2007,2008,2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. if nargin<2, error('insufficient input arguments\n\tusage: train_sc(D,C,...)\n'); end if nargin<3, MODE = 'LDA'; end if nargin<4, W = []; end if ischar(MODE) tmp = MODE; clear MODE; MODE.TYPE = tmp; elseif ~isfield(MODE,'TYPE') MODE.TYPE=''; end if isfield(MODE,'hyperparameters') && ~isfield(MODE,'hyperparameter'), %% for backwards compatibility, this might become obsolete warning('MODE.hyperparameters are used, You should use MODE.hyperparameter instead!!!'); MODE.hyperparameter = MODE.hyperparameters; end sz = size(D); if sz(1)~=size(classlabel,1), error('length of data and classlabel does not fit'); end % remove all NaN's if 1, % several classifier can deal with NaN's, there is no need to remove them. elseif isempty(W) %% TODO: some classifiers can deal with NaN's in D. Test whether this can be relaxed. %ix = any(isnan([classlabel]),2); ix = any(isnan([D,classlabel]),2); D(ix,:) = []; classlabel(ix,:)=[]; W = []; else %ix = any(isnan([classlabel]),2); ix = any(isnan([D,classlabel]),2); D(ix,:)=[]; classlabel(ix,:)=[]; W(ix,:)=[]; warning('support for weighting of samples is still experimental'); end sz = size(D); if sz(1)~=length(classlabel), error('length of data and classlabel does not fit'); end if ~isfield(MODE,'hyperparameter') MODE.hyperparameter = []; end if 0, ; elseif ~isempty(strfind(lower(MODE.TYPE),'/delet')) POS1 = find(MODE.TYPE=='/'); [rix,cix] = row_col_deletion(D); if ~isempty(W), W=W(rix); end CC = train_sc(D(rix,cix),classlabel(rix,:),MODE.TYPE(1:POS1(1)-1),W); CC.G = sparse(cix, 1:length(cix), 1, size(D,2), length(cix)); if isfield(CC,'weights') W = [CC.weights(1,:); CC.weights(2:end,:)]; CC.weights = sparse(size(D,2)+1, size(W,2)); CC.weights([1,cix+1],:) = W; CC.datatype = ['classifier:statistical:',lower(MODE.TYPE)]; else CC.datatype = [CC.datatype,'/delet']; end elseif ~isempty(strfind(lower(MODE.TYPE),'nbpw')) error('NBPW not implemented yet') %%%% Naive Bayesian Parzen Window Classifier. [classlabel,CC.Labels] = CL1M(classlabel); for k = 1:length(CC.Labels), [d,CC.MEAN(k,:)] = center(D(classlabel==CC.Labels(k),:),1); [CC.VAR(k,:),CC.N(k,:)] = sumskipnan(d.^2,1); h2_opt = (4./(3*CC.N(k,:))).^(2/5).*CC.VAR(k,:); %%% TODO end elseif ~isempty(strfind(lower(MODE.TYPE),'nbc')) %%%% Naive Bayesian Classifier if ~isempty(strfind(lower(MODE.TYPE),'anbc')) %%%% Augmented Naive Bayesian classifier. [CC.V,L] = eig(covm(D,'M',W)); D = D*CC.V; else CC.V = eye(size(D,2)); end [classlabel,CC.Labels] = CL1M(classlabel); for k = 1:length(CC.Labels), ix = classlabel==CC.Labels(k); %% [d,CC.MEAN(k,:)] = center(D(ix,:),1); if ~isempty(W) [s,n] = sumskipnan(D(ix,:),1,W(ix)); CC.MEAN(k,:) = s./n; d = D(ix,:) - CC.MEAN(repmat(k,sum(ix),1),:); [CC.VAR(k,:),CC.N(k,:)] = sumskipnan(d.^2,1,W(ix)); else [s,n] = sumskipnan(D(ix,:),1); CC.MEAN(k,:) = s./n; d = D(ix,:) - CC.MEAN(repmat(k,sum(ix),1),:); [CC.VAR(k,:),CC.N(k,:)] = sumskipnan(d.^2,1); end end CC.VAR = CC.VAR./max(CC.N-1,0); CC.datatype = ['classifier:',lower(MODE.TYPE)]; elseif ~isempty(strfind(lower(MODE.TYPE),'lpm')) if ~isempty(W) error('Error TRAIN_SC: Classifier (%s) does not support weighted samples.',MODE.TYPE); end % linear programming machine % CPLEX optimizer: ILOG solver, ilog cplex 6.5 reference manual http://www.ilog.com MODE.TYPE = 'LPM'; if ~isfield(MODE.hyperparameter,'c_value') MODE.hyperparameter.c_value = 1; end [classlabel,CC.Labels] = CL1M(classlabel); M = length(CC.Labels); if M==2, M=1; end % For a 2-class problem, only 1 Discriminant is needed for k = 1:M, %LPM = train_LPM(D,(classlabel==CC.Labels(k)),'C',MODE.hyperparameter.c_value); LPM = train_LPM(D',(classlabel'==CC.Labels(k))); CC.weights(:,k) = [-LPM.b; LPM.w(:)]; end CC.hyperparameter.c_value = MODE.hyperparameter.c_value; CC.datatype = ['classifier:',lower(MODE.TYPE)]; elseif ~isempty(strfind(lower(MODE.TYPE),'pla')), % Perceptron Learning Algorithm [rix,cix] = row_col_deletion(D); [CL101,CC.Labels] = cl101(classlabel); M = size(CL101,2); weights = sparse(length(cix)+1,M); %ix = randperm(size(D,1)); %% randomize samples ??? if ~isfield(MODE.hyperparameter,'alpha') if isfield(MODE.hyperparameter,'alpha') alpha = MODE.hyperparameter.alpha; else alpha = 1; end for k = rix(:)', %e = ((classlabel(k)==(1:M))-.5) - sign([1, D(k,cix)] * weights)/2; e = CL101(k,:) - sign([1, D(k,cix)] * weights); weights = weights + alpha * [1,D(k,cix)]' * e ; end else %if ~isempty(W) if isfield(MODE.hyperparameter,'alpha') W = W*MODE.hyperparameter.alpha; end for k = rix(:)', %e = ((classlabel(k)==(1:M))-.5) - sign([1, D(k,cix)] * weights)/2; e = CL101(k,:) - sign([1, D(k,cix)] * weights); weights = weights + W(k) * [1,D(k,cix)]' * e ; end end CC.weights = sparse(size(D,2)+1,M); CC.weights([1,cix+1],:) = weights; CC.datatype = ['classifier:',lower(MODE.TYPE)]; elseif ~isempty(strfind(lower(MODE.TYPE),'adaline')) || ~isempty(strfind(lower(MODE.TYPE),'lms')), % adaptive linear elemente, least mean squares, delta rule, Widrow-Hoff, [rix,cix] = row_col_deletion(D); [CL101,CC.Labels] = cl101(classlabel); M = size(CL101,2); weights = sparse(length(cix)+1,M); %ix = randperm(size(D,1)); %% randomize samples ??? if isempty(W) if isfield(MODE.hyperparameter,'alpha') alpha = MODE.hyperparameter.alpha; else alpha = 1; end for k = rix(:)', %e = (classlabel(k)==(1:M)) - [1, D(k,cix)] * weights; e = CL101(k,:) - sign([1, D(k,cix)] * weights); weights = weights + alpha * [1,D(k,cix)]' * e ; end else %if ~isempty(W) if isfield(MODE.hyperparameter,'alpha') W = W*MODE.hyperparameter.alpha; end for k = rix(:)', %e = (classlabel(k)==(1:M)) - [1, D(k,cix)] * weights; e = CL101(k,:) - sign([1, D(k,cix)] * weights); weights = weights + W(k) * [1,D(k,cix)]' * e ; end end CC.weights = sparse(size(D,2)+1,M); CC.weights([1,cix+1],:) = weights; CC.datatype = ['classifier:',lower(MODE.TYPE)]; elseif ~isempty(strfind(lower(MODE.TYPE),'winnow')) % winnow algorithm if ~isempty(W) error('Classifier (%s) does not support weighted samples.',MODE.TYPE); end [rix,cix] = row_col_deletion(D); [CL101,CC.Labels] = cl101(classlabel); M = size(CL101,2); weights = ones(length(cix),M); theta = size(D,2)/2; for k = rix(:)', e = CL101(k,:) - sign(D(k,cix) * weights - theta); weights = weights.* 2.^(D(k,cix)' * e); end CC.weights = sparse(size(D,2)+1,M); CC.weights(cix+1,:) = weights; CC.datatype = ['classifier:',lower(MODE.TYPE)]; elseif ~isempty(strfind(lower(MODE.TYPE),'pls')) || ~isempty(strfind(lower(MODE.TYPE),'reg')) % 4th version: support for weighted samples - work well with unequally distributed data: % regression analysis, can handle sparse data, too. if nargin<4, W = []; end [rix, cix] = row_col_deletion(D); wD = [ones(length(rix),1),D(rix,cix)]; if ~isempty(W) %% wD = diag(W)*wD W = W(:); for k=1:size(wD,2) wD(:,k) = W(rix).*wD(:,k); end end [CL101, CC.Labels] = cl101(classlabel(rix,:)); M = size(CL101,2); CC.weights = sparse(sz(2)+1,M); %[rix, cix] = row_col_deletion(wD); [q,r] = qr(wD,0); if isempty(W) CC.weights([1,cix+1],:) = r\(q'*CL101); else CC.weights([1,cix+1],:) = r\(q'*(W(rix,ones(1,M)).*CL101)); end %for k = 1:M, % CC.weights(cix,k) = r\(q'*(W.*CL101(rix,k))); %end CC.datatype = ['classifier:statistical:',lower(MODE.TYPE)]; elseif ~isempty(strfind(MODE.TYPE,'WienerHopf')) % Q: equivalent to LDA % equivalent to Regression, except regression can not deal with NaN's [CL101,CC.Labels] = cl101(classlabel); M = size(CL101,2); CC.weights = sparse(size(D,2)+1,M); cc = covm(D,'E',W); %c1 = classlabel(~isnan(classlabel)); %c2 = ones(sum(~isnan(classlabel)),M); %for k = 1:M, % c2(:,k) = c1==CC.Labels(k); %end %CC.weights = cc\covm([ones(size(c2,1),1),D(~isnan(classlabel),:)],2*real(c2)-1,'M',W); CC.weights = cc\covm([ones(size(D,1),1),D],CL101,'M',W); CC.datatype = ['classifier:statistical:',lower(MODE.TYPE)]; elseif ~isempty(strfind(lower(MODE.TYPE),'/gsvd')) if ~isempty(W) error('Classifier (%s) does not support weighted samples.',MODE.TYPE); end % [2] Peg Howland and Haesun Park, 2004 % Generalizing Discriminant Analysis Using the Generalized Singular Value Decomposition % IEEE Transactions on Pattern Analysis and Machine Intelligence, 26(8), 2004. % dx.doi.org/10.1109/TPAMI.2004.46 % [3] http://www-static.cc.gatech.edu/~kihwan23/face_recog_gsvd.htm [classlabel,CC.Labels] = CL1M(classlabel); [rix,cix] = row_col_deletion(D); Hw = zeros(length(rix)+length(CC.Labels), length(cix)); Hb = []; m0 = mean(D(rix,cix)); K = length(CC.Labels); N = zeros(1,K); for k = 1:K, ix = find(classlabel(rix)==CC.Labels(k)); N(k) = length(ix); [Hw(ix,:), mu] = center(D(rix(ix),cix)); %Hb(k,:) = sqrt(N(k))*(mu(k,:)-m0); Hw(length(rix)+k,:) = sqrt(N(k))*(mu-m0); % Hb(k,:) end try [P,R,Q] = svd(Hw,'econ'); catch % needed because SVD(..,'econ') not supported in Matlab 6.x [P,R,Q] = svd(Hw,0); end t = rank(R); clear Hw Hb mu; %[size(D);size(P);size(Q);size(R)] R = R(1:t,1:t); %P = P(1:size(D,1),1:t); %Q = Q(1:t,:); [U,E,W] = svd(P(1:length(rix),1:t),0); %[size(U);size(E);size(W)] clear U E P; %[size(Q);size(R);size(W)] %G = Q(1:t,:)'*[R\W']; G = Q(:,1:t)*(R\W'); % this works as well and needs only 'econ'-SVD %G = G(:,1:t); % not needed % do not use this, gives very bad results for Medline database %G = G(:,1:K); this seems to be a typo in [2] and [3]. CC = train_sc(D(:,cix)*G,classlabel,MODE.TYPE(1:find(MODE.TYPE=='/')-1)); CC.G = sparse(size(D,2),size(G,2)); CC.G(cix,:) = G; if isfield(CC,'weights') CC.weights = sparse([CC.weights(1,:); CC.G*CC.weights(2:end,:)]); CC.datatype = ['classifier:statistical:', lower(MODE.TYPE)]; else CC.datatype = [CC.datatype,'/gsvd']; end elseif ~isempty(strfind(lower(MODE.TYPE),'sparse')) if ~isempty(W) error('Classifier (%s) does not support weighted samples.',MODE.TYPE); end % [5] J.D. Tebbens and P.Schlesinger (2006), % Improving Implementation of Linear Discriminant Analysis for the Small Sample Size Problem % http://www.cs.cas.cz/mweb/download/publi/JdtSchl2006.pdf [classlabel,CC.Labels] = CL1M(classlabel); [rix,cix] = row_col_deletion(D); warning('sparse LDA is sensitive to linear transformations') M = length(CC.Labels); G = sparse([],[],[],length(rix),M,length(rix)); for k = 1:M, G(classlabel(rix)==CC.Labels(k),k) = 1; end tol = 1e-10; G = train_lda_sparse(D(rix,cix),G,1,tol); CC.datatype = 'classifier:slda'; POS1 = find(MODE.TYPE=='/'); %G = v(:,1:size(G.trafo,2)).*G.trafo; %CC.weights = s * CC.weights(2:end,:) + sparse(1,1:M,CC.weights(1,:),sz(2)+1,M); CC = train_sc(D(rix,cix)*G.trafo,classlabel(rix),MODE.TYPE(1:POS1(1)-1)); CC.G = sparse(size(D,2),size(G.trafo,2)); CC.G(cix,:) = G.trafo; if isfield(CC,'weights') CC.weights = sparse([CC.weights(1,:); CC.G*CC.weights(2:end,:)]); CC.datatype = ['classifier:statistical:',lower(MODE.TYPE)]; else CC.datatype = [CC.datatype,'/sparse']; end elseif ~isempty(strfind(lower(MODE.TYPE),'rbf')) if ~isempty(W) error('Classifier (%s) does not support weighted samples.',MODE.TYPE); end % Martin Hieden's RBF-SVM if exist('svmpredict_mex','file')==3, MODE.TYPE = 'SVM:LIB:RBF'; else error('No SVM training algorithm available. Install LibSVM for Matlab.\n'); end CC.options = '-t 2 -q'; %use RBF kernel, set C, set gamma if isfield(MODE.hyperparameter,'gamma') CC.options = sprintf('%s -c %g', CC.options, MODE.hyperparameter.c_value); % set C end if isfield(MODE.hyperparameter,'c_value') CC.options = sprintf('%s -g %g', CC.options, MODE.hyperparameter.gamma); % set C end % pre-whitening [D,r,m]=zscore(D,1); CC.prewhite = sparse(2:sz(2)+1,1:sz(2),r,sz(2)+1,sz(2),2*sz(2)); CC.prewhite(1,:) = -m.*r; [classlabel,CC.Labels] = CL1M(classlabel); CC.model = svmtrain_mex(classlabel, D, CC.options); % Call the training mex File CC.datatype = ['classifier:',lower(MODE.TYPE)]; elseif ~isempty(strfind(lower(MODE.TYPE),'svm11')) if ~isempty(W) error('Classifier (%s) does not support weighted samples.',MODE.TYPE); end % 1-versus-1 scheme if ~isfield(MODE.hyperparameter,'c_value') MODE.hyperparameter.c_value = 1; end CC.options=sprintf('-c %g -t 0 -q',MODE.hyperparameter.c_value); %use linear kernel, set C CC.hyperparameter.c_value = MODE.hyperparameter.c_value; % pre-whitening [D,r,m]=zscore(D,1); CC.prewhite = sparse(2:sz(2)+1,1:sz(2),r,sz(2)+1,sz(2),2*sz(2)); CC.prewhite(1,:) = -m.*r; [classlabel,CC.Labels] = CL1M(classlabel); CC.model = svmtrain_mex(classlabel, D, CC.options); % Call the training mex File FUN = 'SVM:LIB:1vs1'; CC.datatype = ['classifier:',lower(FUN)]; elseif ~isempty(strfind(lower(MODE.TYPE),'psvm')) if ~isempty(W) %%% error('Classifier (%s) does not support weighted samples.',MODE.TYPE); warning('Classifier (%s) in combination with weighted samples is not tested.',MODE.TYPE); end if ~isfield(MODE,'hyperparameter') nu = 1; elseif isfield(MODE.hyperparameter,'nu') nu = MODE.hyperparameter.nu; else nu = 1; end [m,n] = size(D); [CL101,CC.Labels] = cl101(classlabel); CC.weights = sparse(n+1,size(CL101,2)); M = size(CL101,2); for k = 1:M, d = sparse(1:m,1:m,CL101(:,k)); H = d * [ones(m,1),D]; %%% r = sum(H,1)'; r = sumskipnan(H,1,W)'; %%% r = (speye(n+1)/nu + H' * H)\r; %solve (I/nu+H’*H)r=H’*e [HTH, nn] = covm(H,H,'M',W); r = (speye(n+1)/nu + HTH)\r; %solve (I/nu+H’*H)r=H’*e u = nu*(1-(H*r)); %%% CC.weights(:,k) = u'*H; [c,nn] = covm(u,H,'M',W); CC.weights(:,k) = c'; end CC.hyperparameter.nu = nu; CC.datatype = ['classifier:',lower(MODE.TYPE)]; elseif ~isempty(strfind(lower(MODE.TYPE),'svm:lin4')) if ~isfield(MODE.hyperparameter,'c_value') MODE.hyperparameter.c_value = 1; end [classlabel,CC.Labels] = CL1M(classlabel); M = length(CC.Labels); CC.weights = sparse(size(D,2)+1,M); [rix,cix] = row_col_deletion(D); % pre-whitening [D,r,m]=zscore(D(rix,cix),1); sz2 = length(cix); s = sparse(2:sz2+1,1:sz2,r,sz2+1,sz2,2*sz2); s(1,:) = -m.*r; CC.options = sprintf('-s 4 -B 1 -c %f -q', MODE.hyperparameter.c_value); % C-SVC, C=1, linear kernel, degree = 1, model = train(W, classlabel, sparse(D), CC.options); % C-SVC, C=1, linear kernel, degree = 1, weights = model.w([end,1:end-1],:)'; CC.weights([1,cix+1],:) = s * weights(2:end,:) + sparse(1,1:M,weights(1,:),sz2+1,M); % include pre-whitening transformation CC.weights([1,cix+1],:) = s * CC.weights(cix+1,:) + sparse(1,1:M,CC.weights(1,:),sz2+1,M); % include pre-whitening transformation CC.hyperparameter.c_value = MODE.hyperparameter.c_value; CC.datatype = ['classifier:',lower(MODE.TYPE)]; elseif ~isempty(strfind(lower(MODE.TYPE),'svm')) if ~isfield(MODE.hyperparameter,'c_value') MODE.hyperparameter.c_value = 1; end if any(MODE.TYPE==':'), % nothing to be done elseif exist('train','file')==3, MODE.TYPE = 'SVM:LIN'; %% liblinear elseif exist('svmtrain_mex','file')==3, MODE.TYPE = 'SVM:LIB'; elseif (exist('svmtrain','file')==3), MODE.TYPE = 'SVM:LIB'; fprintf(1,'You need to rename %s to svmtrain_mex.mex !! \n Press any key to continue !!!\n',which('svmtrain.mex')); elseif exist('svmtrain','file')==2, MODE.TYPE = 'SVM:bioinfo'; elseif exist('mexSVMTrain','file')==3, MODE.TYPE = 'SVM:OSU'; elseif exist('svcm_train','file')==2, MODE.TYPE = 'SVM:LOO'; elseif exist('svmclass','file')==2, MODE.TYPE = 'SVM:KM'; elseif exist('svc','file')==2, MODE.TYPE = 'SVM:Gunn'; else error('No SVM training algorithm available. Install OSV-SVM, or LOO-SVM, or libSVM for Matlab.\n'); end %%CC = train_svm(D,classlabel,MODE); [CL101,CC.Labels] = cl101(classlabel); M = size(CL101,2); [rix,cix] = row_col_deletion(D); CC.weights = sparse(sz(2)+1, M); % pre-whitening [D,r,m]=zscore(D(rix,cix),1); sz2 = length(cix); s = sparse(2:sz2+1,1:sz2,r,sz2+1,sz2,2*sz2); s(1,:) = -m.*r; for k = 1:M, cl = CL101(rix,k); if strncmp(MODE.TYPE, 'SVM:LIN',7); if isfield(MODE,'options') CC.options = MODE.options; else t = 0; if length(MODE.TYPE)>7, t=MODE.TYPE(8)-'0'; end if (t<0 || t>6) t=0; end CC.options = sprintf('-s %i -B 1 -c %f -q',t, MODE.hyperparameter.c_value); % C-SVC, C=1, linear kernel, degree = 1, end model = train(W, cl, sparse(D), CC.options); % C-SVC, C=1, linear kernel, degree = 1, w = -model.w'; Bias = -model.bias; w = -model.w(:,1:end-1)'; Bias = -model.w(:,end)'; elseif strcmp(MODE.TYPE, 'SVM:LIB'); %% tested with libsvm-mat-2.9-1 if isfield(MODE,'options') CC.options = MODE.options; else CC.options = sprintf('-s 0 -c %f -t 0 -d 1 -q', MODE.hyperparameter.c_value); % C-SVC, C=1, linear kernel, degree = 1, end model = svmtrain_mex(cl, D, CC.options); % C-SVC, C=1, linear kernel, degree = 1, w = cl(1) * model.SVs' * model.sv_coef; %Calculate decision hyperplane weight vector % ensure correct sign of weight vector and Bias according to class label Bias = model.rho * cl(1); elseif strcmp(MODE.TYPE, 'SVM:bioinfo'); % SVM classifier from bioinformatics toolbox. % Settings suggested by Ian Daly, 2011-06-06 options = optimset('Display','iter','maxiter',20000, 'largescale','off'); CC.SVMstruct = svmtrain(D, cl, 'AUTOSCALE', 0, 'quadprog_opts', options, 'Method', 'LS', 'kernel_function', 'polynomial'); Bias = -CC.SVMstruct.Bias; w = -CC.SVMstruct.Alpha'*CC.SVMstruct.SupportVectors; elseif strcmp(MODE.TYPE, 'SVM:OSU'); [AlphaY, SVs, Bias] = mexSVMTrain(D', cl', [0 1 1 1 MODE.hyperparameter.c_value]); % Linear Kernel, C=1; degree=1, c-SVM w = -SVs * AlphaY'*cl(1); %Calculate decision hyperplane weight vector % ensure correct sign of weight vector and Bias according to class label Bias = -Bias * cl(1); elseif strcmp(MODE.TYPE, 'SVM:LOO'); [a, Bias, g, inds] = svcm_train(D, cl, MODE.hyperparameter.c_value); % C = 1; w = D(inds,:)' * (a(inds).*cl(inds)) ; elseif strcmp(MODE.TYPE, 'SVM:Gunn'); [nsv, alpha, Bias,svi] = svc(D, cl, 1, MODE.hyperparameter.c_value); % linear kernel, C = 1; w = D(svi,:)' * alpha(svi) * cl(1); Bias = mean(D*w); elseif strcmp(MODE.TYPE, 'SVM:KM'); [xsup,w1,Bias,inds] = svmclass(D, cl, MODE.hyperparameter.c_value, 1, 'poly', 1); % C = 1; w = -D(inds,:)' * w1; else fprintf(2,'Error TRAIN_SVM: no SVM training algorithm available\n'); return; end CC.weights(1,k) = -Bias; CC.weights(cix+1,k) = w; end CC.weights([1,cix+1],:) = s * CC.weights(cix+1,:) + sparse(1,1:M,CC.weights(1,:),sz2+1,M); % include pre-whitening transformation CC.hyperparameter.c_value = MODE.hyperparameter.c_value; CC.datatype = ['classifier:',lower(MODE.TYPE)]; elseif ~isempty(strfind(lower(MODE.TYPE),'csp')) CC.datatype = ['classifier:',lower(MODE.TYPE)]; [classlabel,CC.Labels] = CL1M(classlabel); CC.MD = repmat(NaN,[sz(2)+[1,1],length(CC.Labels)]); CC.NN = CC.MD; for k = 1:length(CC.Labels), %% [CC.MD(k,:,:),CC.NN(k,:,:)] = covm(D(classlabel==CC.Labels(k),:),'E'); ix = classlabel==CC.Labels(k); if isempty(W) [CC.MD(:,:,k),CC.NN(:,:,k)] = covm(D(ix,:), 'E'); else [CC.MD(:,:,k),CC.NN(:,:,k)] = covm(D(ix,:), 'E', W(ix)); end end ECM = CC.MD./CC.NN; W = csp(ECM,'CSP3'); %%% ### This is a hack ### CC.FiltA = 50; CC.FiltB = ones(CC.FiltA,1); d = filtfilt(CC.FiltB,CC.FiltA,(D*W).^2); CC.csp_w = W; CC.CSP = train_sc(log(d),classlabel); else % Linear and Quadratic statistical classifiers CC.datatype = ['classifier:statistical:',lower(MODE.TYPE)]; [classlabel,CC.Labels] = CL1M(classlabel); CC.MD = repmat(NaN,[sz(2)+[1,1],length(CC.Labels)]); CC.NN = CC.MD; for k = 1:length(CC.Labels), ix = classlabel==CC.Labels(k); if isempty(W) [CC.MD(:,:,k),CC.NN(:,:,k)] = covm(D(ix,:), 'E'); else [CC.MD(:,:,k),CC.NN(:,:,k)] = covm(D(ix,:), 'E', W(ix)); end end ECM = CC.MD./CC.NN; NC = size(CC.MD); if strncmpi(MODE.TYPE,'LD',2) || strncmpi(MODE.TYPE,'FDA',3) || strncmpi(MODE.TYPE,'FLDA',3), %if NC(1)==2, NC(1)=1; end % linear two class problem needs only one discriminant CC.weights = repmat(NaN,NC(2),NC(3)); % memory allocation type = MODE.TYPE(3)-'0'; ECM0 = squeeze(sum(ECM,3)); %decompose ECM for k = 1:NC(3); ix = [1:k-1,k+1:NC(3)]; dM = CC.MD(:,1,k)./CC.NN(:,1,k) - sum(CC.MD(:,1,ix),3)./sum(CC.NN(:,1,ix),3); switch (type) case 2 % LD2 ecm0 = (sum(ECM(:,:,ix),3)/(NC(3)-1) + ECM(:,:,k)); case 4 % LD4 ecm0 = 2*(sum(ECM(:,:,ix),3) + ECM(:,:,k))/NC(3); % ecm0 = sum(CC.MD,3)./sum(CC.NN,3); case 5 % LD5 ecm0 = ECM(:,:,k); case 6 % LD6 ecm0 = sum(CC.MD(:,:,ix),3)./sum(CC.NN(:,:,ix),3); otherwise % LD3, LDA, FDA ecm0 = ECM0; end if isfield(MODE.hyperparameter,'gamma') ecm0 = ecm0 + mean(diag(ecm0))*eye(size(ecm0))*MODE.hyperparameter.gamma; end CC.weights(:,k) = ecm0\dM; end %CC.weights = sparse(CC.weights); elseif strcmpi(MODE.TYPE,'RDA'); if isfield(MODE,'hyperparameter') CC.hyperparameter = MODE.hyperparameter; end % default values if ~isfield(CC.hyperparameter,'gamma') CC.hyperparameter.gamma = 0; end if ~isfield(CC.hyperparameter,'lambda') CC.hyperparameter.lambda = 1; end else ECM0 = sum(ECM,3); nn = ECM0(1,1,1); % number of samples in training set for class k XC = squeeze(ECM0(:,:,1))/nn; % normalize correlation matrix M = XC(1,2:NC(2)); % mean S = XC(2:NC(2),2:NC(2)) - M'*M;% covariance matrix try [v,d]=eig(S); U0 = v(diag(d)==0,:); CC.iS2 = U0*U0'; end %M = M/nn; S=S/(nn-1); ICOV0 = inv(S); CC.iS0 = ICOV0; % ICOV1 = zeros(size(S)); for k = 1:NC(3), %[M,sd,S,xc,N] = decovm(ECM{k}); %decompose ECM %c = size(ECM,2); nn = ECM(1,1,k);% number of samples in training set for class k XC = squeeze(ECM(:,:,k))/nn;% normalize correlation matrix M = XC(1,2:NC(2));% mean S = XC(2:NC(2),2:NC(2)) - M'*M;% covariance matrix %M = M/nn; S=S/(nn-1); %ICOV(1) = ICOV(1) + (XC(2:NC(2),2:NC(2)) - )/nn CC.M{k} = M; CC.IR{k} = [-M;eye(NC(2)-1)]*inv(S)*[-M',eye(NC(2)-1)]; % inverse correlation matrix extended by mean CC.IR0{k} = [-M;eye(NC(2)-1)]*ICOV0*[-M',eye(NC(2)-1)]; % inverse correlation matrix extended by mean d = NC(2)-1; if exist('OCTAVE_VERSION','builtin') S = full(S); end CC.logSF(k) = log(nn) - d/2*log(2*pi) - det(S)/2; CC.logSF2(k) = -2*log(nn/sum(ECM(:,1,1))); CC.logSF3(k) = d*log(2*pi) + log(det(S)); CC.logSF4(k) = log(det(S)) + 2*log(nn); CC.logSF5(k) = log(det(S)); CC.logSF6(k) = log(det(S)) - 2*log(nn/sum(ECM(:,1,1))); CC.logSF7(k) = log(det(S)) + d*log(2*pi) - 2*log(nn/sum(ECM(:,1,1))); CC.logSF8(k) = sum(log(svd(S))) + log(nn) - log(sum(ECM(:,1,1))); CC.SF(k) = nn/sqrt((2*pi)^d * det(S)); %CC.datatype='LLBC'; end end end end function [CL101,Labels] = cl101(classlabel) %% convert classlabels to {-1,1} encoding if (all(classlabel>=0) && all(classlabel==fix(classlabel)) && (size(classlabel,2)==1)) M = max(classlabel); if M==2, CL101 = (classlabel==2)-(classlabel==1); else CL101 = zeros(size(classlabel,1),M); for k=1:M, %% One-versus-Rest scheme CL101(:,k) = 2*real(classlabel==k) - 1; end end CL101(isnan(classlabel),:) = NaN; %% or zero ??? elseif all((classlabel==1) | (classlabel==-1) | (classlabel==0) ) CL101 = classlabel; M = size(CL101,2); else classlabel, error('format of classlabel unsupported'); end Labels = 1:M; return; end function [cl1m, Labels] = CL1M(classlabel) %% convert classlabels to 1..M encoding if (all(classlabel>=0) && all(classlabel==fix(classlabel)) && (size(classlabel,2)==1)) cl1m = classlabel; elseif all((classlabel==1) | (classlabel==-1) | (classlabel==0) ) CL101 = classlabel; M = size(classlabel,2); if any(sum(classlabel==1,2)>1) warning('invalid format of classlabel - at most one category may have +1'); end if (M==1), cl1m = (classlabel==-1) + 2*(classlabel==+1); else [tmp, cl1m] = max(classlabel,[],2); if any(tmp ~= 1) warning('some class might not be properly represented - you might what to add another column to classlabel = [max(classlabel,[],2)<1,classlabel]'); end cl1m(tmp<1)= 0; %% or NaN ??? end else classlabel error('format of classlabel unsupported'); end Labels = 1:max(cl1m); return; end NaN/inst/tpdf.m0000664002356700235670000000317511656313737014054 0ustar schloeglschloeglfunction p = tpdf(x,n) % TPDF returns student probability density % % pdf = tpdf(x,DF); % % Computes the PDF of a the student distribution % with DF degreas of freedom % x,DF must be matrices of same size, or any one can be a scalar. % % see also: TINV, TCDF, NORMPDF, NORMCDF, NORMINV % Reference(s): % $Id: tpdf.m 9033 2011-11-08 20:58:07Z schloegl $ % Copyright (C) 2000-2003,2008,2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % allocate memory and check size of arguments p = x+n; % if this line causes an error, size of input arguments do not fit. ix = (n>0) & (n~=inf) & ~isnan(x); % make size of x and n equal n = x+n-x; x = x+n-n; % workaround for invalid arguments in BETA if any(ix) p(ix) = (exp (-(n(ix)+1).*log(1+x(ix).^2./n(ix))/2) ./ (sqrt(n(ix)).* beta(n(ix)/2, 1/2))); end; p(~ix)= NaN; % shape output p = reshape(p,size(x)); %!assert(tpdf(NaN,4),NaN) NaN/inst/ranks.m0000664002356700235670000001535512240252101014211 0ustar schloeglschloeglfunction r = ranks(X,DIM,Mode) % RANKS gives the rank of each element in a vector. % This program uses an advanced algorithm with averge effort O(m.n.log(n)) % NaN in the input yields NaN in the output. % % r = ranks(X[,DIM]) % if X is a vector, return the vector of ranks of X adjusted for ties. % if X is matrix, the rank is calculated along dimension DIM. % if DIM is zero or empty, the lowest dimension with more then 1 element is used. % r = ranks(X,DIM,'traditional') % implements the traditional algorithm with O(n^2) computational % and O(n^2) memory effort % r = ranks(X,DIM,'mtraditional') % implements the traditional algorithm with O(n^2) computational % and O(n) memory effort % r = ranks(X,DIM,'advanced ') % implements an advanced algorithm with O(n*log(n)) computational % and O(n.log(n)) memory effort % r = ranks(X,DIM,'advanced-ties') % implements an advanced algorithm with O(n*log(n)) computational % and O(n.log(n)) memory effort % but without correction for ties % This is the fastest algorithm % % see also: CORRCOEF, SPEARMAN, RANKCORR % % REFERENCES: % -- % $Id: ranks.m 12338 2013-11-11 22:03:45Z schloegl $ % Copyright (C) 2000-2002,2005,2010,2013 by Alois Schloegl % This script is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % Features: % + is fast, uses an efficient algorithm for the rank correlation % + computational effort is O(n.log(n)) instead of O(n^2) % + memory effort is O(n.log(n)), instead of O(n^2). % Now, the ranks of 8000 elements can be easily calculated % + NaNs in the input yield NaN in the output % + compatible with Octave and Matlab % + traditional method is also implemented for comparison. if nargin<2, DIM = 0; end; if ischar(DIM), Mode= DIM; DIM = 0; elseif (nargin<3), Mode = ''; end; if isempty(Mode), Mode='advanced '; end; sz_orig = size (X); X = squeeze (X); %remove singleton dimensions for convenience nd = ndims (X); if (~DIM) DIM = 1; end if DIM > 1 %shift the array so that the dimension to sort over is first perm = [DIM 1:(DIM-1) (DIM+1):nd]; X = permute (X, perm); end if nd > 2 %convert X to 2-D if it has >2 dimensions sz = size(X); N = sz(1); M = prod(sz(2:end)); X = reshape(X, N, M); else [N,M] = size(X); end if strcmp(Mode(1:min(11,length(Mode))),'traditional'), % traditional, needs O(m.n^2) % this method was originally implemented by: KH % Comment of KH: This code is rather ugly, but is there an easy way to get the ranks adjusted for ties from sort? r = zeros(size(X)); for i = 1:M; p = X(:, i(ones(1,N))); r(:,i) = (sum (p < p') + (sum (p == p') + 1) / 2)'; end; % r(r<1)=NaN; elseif strcmp(Mode(1:min(12,length(Mode))),'mtraditional'), % + memory effort is lower r = zeros(size(X)); for k = 1:N; for i = 1:M; r(k,i) = (sum (X(:,i) < X(k,i)) + (sum (X(:,i) == X(k,i)) + 1) / 2); end; end; % r(r<1)=NaN; elseif strcmp(Mode(1:min(13,length(Mode))),'advanced-ties'), % advanced % + uses sorting, hence needs only O(m.n.log(n)) computations % - does not fix ties r = zeros(size(X)); [sX, ix] = sort(X,1); for k=1:M, [tmp,r(:,k)] = sort(ix(:,k),1); % r yields the rank of each element end; r(isnan(X)) = nan; elseif strcmp(Mode(1:min(8,length(Mode))),'advanced'), % advanced % + uses sorting, hence needs only O(m.n.log(n)) computations % [tmp,ix] = sort([X,Y]); % [tmp,r] = sort(ix); % r yields rank. % but because sort does not work accordingly for cell arrays, % and DIM argument not supported by Octave % and DIM argument does not work for cell-arrays in Matlab % we sort each column separately: r = zeros(size(X)); n = N; for k = 1:M, [sX,ix] = sort(X(:,k)); [tmp,r(:,k)] = sort(ix); % r yields the rank of each element % identify multiple occurences (not sure if this important, but implemented to be compatible with traditional version) if isnumeric(X) n=sum(~isnan(X(:,k))); end; x = [0;find(sX~=[sX(2:N);n])]; % for this reason, cells are not implemented yet. d = find(diff(x)>1); % correct rank of multiple occurring elements for l = 1:length(d), t = (x(d(l))+1:x(d(l)+1))'; r(ix(t),k) = mean(t); end; end; r(isnan(X)) = nan; elseif strcmp(Mode,'=='), % the results of both algorithms are compared for testing. % % if the Mode-argument is omitted, both methods are applied and % the results are compared. Once the advanced algorithm is confirmed, % it will become the default Mode. r = ranks(X,'advanced '); r(isnan(r)) = 1/2; if N>100, r1 = ranks(X,'mtraditional'); % Memory effort is lower else r1 = ranks(X,'traditional'); end; if ~all(all(r==r1)), fprintf(2,'WARNING RANKS: advanced algorithm does not agree with traditional one\n Please report to \n'); r = r1; end; r(isnan(X)) = nan; end; %reshape r to match the input X if nd > 2 r = reshape (r, sz); end if (DIM > 1) r = ipermute (r, perm); end r = reshape (r, sz_orig); %restore any singleton dimensions %!shared z, r1, r2 %! z = magic (4); %! r1 = [4 1 1 4; 2 3 3 2; 3 2 2 3; 1 4 4 1]; %! r2 = [4 1 2 3; 1 4 3 2; 3 2 1 4; 2 3 4 1]; %!assert (ranks(z), r1); %!assert (ranks(z, 2), r2); %! z = nan(2, 2, 2); %! z(:, :, 1) = [1 2; 3 4]; %! z(:, :, 2) = [4 3; 2 1]; %! r1 = cat(3, [1 1; 2 2], [2 2; 1 1]); %! r2 = cat(3, [1 2; 1 2], [2 1; 2 1]); %!assert (ranks(z), r1); %!assert (ranks(z, 2), r2); %!assert (ranks(z, 3), r1); NaN/inst/cov.m0000664002356700235670000000755611726461045013707 0ustar schloeglschloeglfunction CC = cov(X,Y,Mode) % COV covariance matrix % X and Y can contain missing values encoded with NaN. % NaN's are skipped, NaN do not result in a NaN output. % The output gives NaN only if there are insufficient input data % The mean is removed from the data. % % Remark: for data contains missing values, the resulting % matrix might not be positiv definite, and its elements have magnitudes % larger than one. This ill-behavior is more likely for small sample % sizes, but there is no garantee that the result "behaves well" for larger % sample sizes. If you want the a "well behaved" result (i.e. positive % definiteness and magnitude of elements not larger than 1), use CORRCOEF. % However, COV is faster than CORRCOEF and might be good enough in some cases. % % C = COV(X [,Mode]); % calculates the (auto-)correlation matrix of X % C = COV(X,Y [,Mode]); % calculates the crosscorrelation between X and Y. % C(i,j) is the correlation between the i-th and jth % column of X and Y, respectively. % NOTE: Octave and Matlab have (in some special cases) incompatible implemenations. % This implementation follows Octave. If the result could be ambigous or % incompatible, a warning will be presented in Matlab. To avoid this warning use: % a) use COV([X(:),Y(:)]) if you want the traditional Matlab result. % b) use C = COV([X,Y]), C = C(1:size(X,2),size(X,2)+1:size(C,2)); if you want to be compatible with this software. % % Mode = 0 [default] scales C by (N-1) % Mode = 1 scales C by N. % % see also: COVM, COR, CORRCOEF, SUMSKIPNAN % % REFERENCES: % http://mathworld.wolfram.com/Covariance.html % $Id: cov.m 9803 2012-03-09 20:03:49Z schloegl $ % Copyright (C) 2000-2003,2005,2009,2011,2012 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin==1 Mode = 0; Y = []; elseif nargin==2, % if all(size(Y)==1) & any(Y==[0,1]); % This is not compatible with octave % short-circuit evaluation is required % but for compatibility to matlab, && is avoided SW = all(size(Y)==1); if SW, SW = any(Y==[0,1]); end; if SW, Mode = Y; Y = []; else Mode = 0; end; elseif nargin==3, else fprintf(2,'Error COV: invalid number of arguments\n'); end; if ~exist('OCTAVE_VERSION','builtin') && ~isempty(Y) && (size(X,2)+size(Y,2)~=2), % COV in Matlab is differently defined than COV in Octave. % For compatibility reasons, this branch reflects the difference. fprintf(2,'Warning NaN/COV: This kind of use of COV is discouraged because it produces different results for Matlab and Octave. \n'); fprintf(2,' (a) the traditional Matlab result can be obtained with: C = COV([X(:),Y(:)]).\n'); fprintf(2,' (b) the traditional Octave result can be obtained with: C = COV([X,Y]); C = C(1:size(X,2),size(X,2)+1:size(C,2)).\n'); if numel(Y)~=numel(X), error('The lengths of X and Y must match.'); end; X = [X(:),Y(:)]; Y = []; end; if isempty(Y) CC = covm(X,['D',int2str(Mode>0)]); else CC = covm(X,Y,['D',int2str(Mode>0)]); end; NaN/inst/test_sc.m0000664002356700235670000003031011714752314014543 0ustar schloeglschloeglfunction [R]=test_sc(CC,D,mode,classlabel) % TEST_SC: apply statistical and SVM classifier to test data % % R = test_sc(CC,D,TYPE [,target_Classlabel]) % R.output output: "signed" distance for each class. % This represents the distances between sample D and the separating hyperplane % The "signed distance" is possitive if it matches the target class, and % and negative if it lays on the opposite side of the separating hyperplane. % R.classlabel class for output data % The target class is optional. If it is provided, the following values are returned. % R.kappa Cohen's kappa coefficient % R.ACC Classification accuracy % R.H Confusion matrix % % The classifier CC is typically obtained by TRAIN_SC. If a statistical % classifier is used, TYPE can be used to modify the classifier. % TYPE = 'MDA' mahalanobis distance based classifier % TYPE = 'MD2' mahalanobis distance based classifier % TYPE = 'MD3' mahalanobis distance based classifier % TYPE = 'GRB' Gaussian radial basis function % TYPE = 'QDA' quadratic discriminant analysis % TYPE = 'LD2' linear discriminant analysis % TYPE = 'LD3', 'LDA', 'FDA, 'FLDA' (Fisher's) linear discriminant analysis % TYPE = 'LD4' linear discriminant analysis % TYPE = 'GDBC' general distance based classifier % % see also: TRAIN_SC % % References: % [1] R. Duda, P. Hart, and D. Stork, Pattern Classification, second ed. % John Wiley & Sons, 2001. % $Id: test_sc.m 9601 2012-02-09 14:14:36Z schloegl $ % Copyright (C) 2005,2006,2008,2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. if nargin<3, mode = []; end; [t1,t] = strtok(CC.datatype,':'); [t2,t] = strtok(t,':'); [t3] = strtok(t,':'); if ~strcmp(t1,'classifier'), return; end; if isfield(CC,'prewhite') D = D*CC.prewhite(2:end,:) + CC.prewhite(ones(size(D,1),1),:); CC = rmfield(CC,'prewhite'); end; POS1 = [strfind(CC.datatype,'/gsvd'),strfind(CC.datatype,'/sparse'),strfind(CC.datatype,'/delet')]; if 0, elseif strcmp(CC.datatype,'classifier:nbpw') error('NBPW not implemented yet') %%%% Naive Bayesian Parzen Window Classifier %%%% d = repmat(NaN,size(D,1),size(CC.MEAN,1)); for k = 1:size(CC.MEAN,1) z = (D - CC.MEAN(repmat(k,size(D,1),1),:)).^2 ./ (CC.VAR(repmat(k,size(D,1),1),:)); z = z + log(CC.VAR(repmat(k,size(D,1),1),:)); % + log(2*pi); d(:,k) = sum(-z/2, 2) + log(mean(CC.N(k,:))); end; d = exp(d-log(mean(sum(CC.N,1)))-log(2*pi)/2); elseif strcmp(CC.datatype,'classifier:nbc') %%%% Naive Bayesian Classifier %%%% d = repmat(NaN,size(D,1),size(CC.MEAN,1)); for k = 1:size(CC.MEAN,1) z = (D - CC.MEAN(repmat(k,size(D,1),1),:)).^2 ./ (CC.VAR(repmat(k,size(D,1),1),:)); z = z + log(CC.VAR(repmat(k,size(D,1),1),:)); % + log(2*pi); d(:,k) = sum(-z/2, 2) + log(mean(CC.N(k,:))); end; d = exp(d-log(mean(sum(CC.N,1)))-log(2*pi)/2); elseif strcmp(CC.datatype,'classifier:anbc') %%%% Augmented Naive Bayesian Classifier %%%% d = repmat(NaN,size(D,1),size(CC.MEAN,1)); for k = 1:size(CC.MEAN,1) z = (D*CC.V - CC.MEAN(repmat(k,size(D,1),1),:)).^2 ./ (CC.VAR(repmat(k,size(D,1),1),:)); z = z + log(CC.VAR(repmat(k,size(D,1),1),:)); % + log(2*pi); d(:,k) = sum(-z/2, 2) + log(mean(CC.N(k,:))); end; d = exp(d-log(mean(sum(CC.N,1)))-log(2*pi)/2); elseif strcmp(CC.datatype,'classifier:statistical:rda') % Friedman (1989) Regularized Discriminant analysis if isfield(CC,'hyperparameter') && isfield(CC.hyperparameter,'lambda') && isfield(CC.hyperparameter,'gamma') D = [ones(size(D,1),1),D]; % add 1-column lambda = CC.hyperparameter.lambda; gamma = CC.hyperparameter.gamma; d = repmat(NaN,size(D,1),size(CC.MD,1)); ECM = CC.MD./CC.NN; NC = size(ECM); ECM0 = squeeze(sum(ECM,3)); %decompose ECM [M0,sd,COV0] = decovm(ECM0); for k = 1:NC(3); [M,sd,s,xc,N] = decovm(squeeze(ECM(:,:,k))); s = ((1-lambda)*N*s+lambda*COV0)/((1-lambda)*N+lambda); s = (1-gamma)*s+gamma*(trace(s))/(NC(2)-1)*eye(NC(2)-1); ir = [-M;eye(NC(2)-1)]*inv(s)*[-M',eye(NC(2)-1)]; % inverse correlation matrix extended by mean d(:,k) = -sum((D*ir).*D,2); % calculate distance of each data point to each class end; else error('QDA: hyperparamters lambda and/or gamma not defined') end; elseif strcmp(CC.datatype,'classifier:csp') d = filtfilt(CC.FiltB,CC.FiltA,(D*CC.csp_w).^2); R = test_sc(CC.CSP,log(d)); % LDA classifier of d = R.output; elseif strcmp(CC.datatype,'classifier:svm:lib:1vs1') || strcmp(CC.datatype,'classifier:svm:lib:rbf'); nr = size(D,1); [cl] = svmpredict_mex(ones(nr,1), D, CC.model); %Use the classifier %Create a pseudo tsd matrix for bci4eval d = full(sparse(1:nr,cl,1,nr,CC.model.nr_class)); elseif isfield(CC,'weights'); %strcmpi(t2,'svm') || (strcmpi(t2,'statistical') & strncmpi(t3,'ld',2)) ; % linear classifiers like: LDA, SVM, LPM %d = [ones(size(D,1),1), D] * CC.weights; d = repmat(NaN,size(D,1),size(CC.weights,2)); for k = 1:size(CC.weights,2), d(:,k) = D * CC.weights(2:end,k) + CC.weights(1,k); end; elseif ~isempty(POS1) % GSVD, sparse & DELETION CC.datatype = CC.datatype(1:POS1(1)-1); r = test_sc(CC, D*sparse(CC.G)); d = r.output; elseif strcmp(t2,'statistical'); if isempty(mode) mode.TYPE = upper(t3); end; D = [ones(size(D,1),1),D]; % add 1-column W = repmat(NaN, size(D,2), size(CC.MD,3)); if 0, elseif strcmpi(mode.TYPE,'LD2'), %d = ldbc2(CC,D); ECM = CC.MD./CC.NN; NC = size(ECM); ECM0 = squeeze(sum(ECM,3)); %decompose ECM [M0] = decovm(ECM0); for k = 1:NC(3); ecm = squeeze(ECM(:,:,k)); [M1,sd,COV1] = decovm(ECM0-ecm); [M2,sd,COV2] = decovm(ecm); w = (COV1+COV2)\(M2'-M1')*2; w0 = -M0*w; W(:,k) = [w0; w]; end; d = D*W; elseif strcmpi(mode.TYPE,'LD3') || strcmpi(mode.TYPE,'FLDA'); %d = ldbc3(CC,D); ECM = CC.MD./CC.NN; NC = size(ECM); ECM0 = squeeze(sum(ECM,3)); %decompose ECM [M0,sd,COV0] = decovm(ECM0); for k = 1:NC(3); ecm = squeeze(ECM(:,:,k)); [M1] = decovm(ECM0-ecm); [M2] = decovm(ecm); w = COV0\(M2'-M1')*2; w0 = -M0*w; W(:,k) = [w0; w]; end; d = D*W; elseif strcmpi(mode.TYPE,'LD4'); %d = ldbc4(CC,D); ECM = CC.MD./CC.NN; NC = size(ECM); ECM0 = squeeze(sum(ECM,3)); %decompose ECM M0 = decovm(ECM0); for k = 1:NC(3); ecm = squeeze(ECM(:,:,k)); [M1,sd,COV1,xc,N1] = decovm(ECM0-ecm); [M2,sd,COV2,xc,N2] = decovm(ecm); w = (COV1*N1+COV2*N2)\((M2'-M1')*(N1+N2)); w0 = -M0*w; W(:,k) = [w0; w]; end; d = D*W; elseif strcmpi(mode.TYPE,'MDA'); d = repmat(NaN,size(D,1),length(CC.IR)); for k = 1:length(CC.IR); d(:,k) = -sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class end; elseif strcmpi(mode.TYPE,'MD2'); d = repmat(NaN,size(D,1),length(CC.IR)); for k = 1:length(CC.IR); d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class end; d = -sqrt(d); elseif strcmpi(mode.TYPE,'GDBC'); d = repmat(NaN,size(D,1),length(CC.IR)); for k = 1:length(CC.IR); d(:,k) = sum((D*CC.IR{k}).*D,2) + CC.logSF7(k); % calculate distance of each data point to each class end; d = exp(-d/2); elseif strcmpi(mode.TYPE,'MD3'); d = repmat(NaN,size(D,1),length(CC.IR)); for k = 1:length(CC.IR); d(:,k) = sum((D*CC.IR{k}).*D,2) + CC.logSF7(k); % calculate distance of each data point to each class end; d = exp(-d/2); d = d./repmat(sum(d,2),1,size(d,2)); % Zuordungswahrscheinlichkeit [1], p.601, equ (18.39) elseif strcmpi(mode.TYPE,'QDA'); d = repmat(NaN,size(D,1),length(CC.IR)); for k = 1:length(CC.IR); % [1] (18.33) QCF - quadratic classification function d(:,k) = -(sum((D*CC.IR{k}).*D,2) - CC.logSF5(k)); end; elseif strcmpi(mode.TYPE,'QDA2'); d = repmat(NaN,size(D,1),length(CC.IR)); for k = 1:length(CC.IR); % [1] (18.33) QCF - quadratic classification function d(:,k) = -(sum((D*(CC.IR{k})).*D,2) + CC.logSF4(k)); end; elseif strcmpi(mode.TYPE,'GRB'); % Gaussian RBF d = repmat(NaN,size(D,1),length(CC.IR)); for k = 1:length(CC.IR); d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class end; d = exp(-sqrt(d)/2); elseif strcmpi(mode.TYPE,'GRB2'); % Gaussian RBF d = repmat(NaN,size(D,1),length(CC.IR)); for k = 1:length(CC.IR); d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class end; d = exp(-d); elseif strcmpi(mode.TYPE,'MQU'); % Multiquadratic d = repmat(NaN,size(D,1),length(CC.IR)); for k = 1:length(CC.IR); d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class end; d = -sqrt(1+d); elseif strcmpi(mode.TYPE,'IMQ'); % Inverse Multiquadratic d = repmat(NaN,size(D,1),length(CC.IR)); for k = 1:length(CC.IR); d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class end; d = (1+d).^(-1/2); elseif strcmpi(mode.TYPE,'Cauchy'); % Cauchy RBF d = repmat(NaN,size(D,1),length(CC.IR)); for k = 1:length(CC.IR); d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class end; d = 1./(1+d); else error('Classifier %s not supported. see HELP TRAIN_SC for supported classifiers.',mode.TYPE); end; else fprintf(2,'Error TEST_SC: unknown classifier\n'); return; end; if size(d,2)>1, [tmp,cl] = max(d,[],2); cl = CC.Labels(cl); cl(isnan(tmp)) = NaN; elseif size(d,2)==1, cl = (d<0) + 2*(d>0); cl(isnan(d)) = NaN; end; R.output = d; R.classlabel = cl; if nargin>3, [R.kappa,R.sd,R.H,z,R.ACC] = kappa(classlabel(:),cl(:)); end; NaN/inst/trimmean.m0000664002356700235670000000453011654472142014721 0ustar schloeglschloeglfunction Q=trimmean(Y,p,DIM) % TRIMMEAN calculates the trimmed mean by removing the fraction of p/2 upper and % p/2 lower samples. Missing values (encoded as NaN) are ignored and not taken into account. % The same number from the upper and lower values are removed, and is compatible to various % spreadsheet programs including GNumeric [1], LibreOffice, OpenOffice and MS Excel. % % Q = trimmean(Y,p) % Q = trimmean(Y,p,DIM) % returns the TRIMMEAN along dimension DIM of sample array Y. % If p is a vector, the TRIMMEAN for each p is computed. % % see also: MAD, RANGE, HISTO2, HISTO3, PERCENTILE, QUANTILE % % References: % [1] http://www.fifi.org/doc/gnumeric-doc/html/C/gnumeric-trimmean.html % $Id: trimmean.m 8953 2011-11-03 11:00:50Z schloegl $ % Copyright (C) 2009,2010,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin<3, DIM = []; end; if isempty(DIM), DIM = find(size(Y)>1,1); if isempty(DIM), DIM = 1; end; end; if nargin<2, help trimmean else sz = size(Y); if DIM > length(sz), sz = [sz,ones(1,DIM-length(sz))]; end; D1 = prod(sz(1:DIM-1)); D2 = length(p); D3 = prod(sz(DIM+1:length(sz))); Q = repmat(nan,[sz(1:DIM-1),D2,sz(DIM+1:length(sz))]); for k = 0:D1-1, for l = 0:D3-1, xi = k + l * D1*sz(DIM) + 1 ; xo = k + l * D1*D2; t = Y(xi:D1:xi+D1*sz(DIM)-1); t = sort(t(~isnan(t))); N = length(t); for m=1:D2, n = floor(N*p(m)/2); f = sum(t(1+n:N-n))/(N-2*n); Q(xo + m*D1) = f; end; end; end; end; %!assert(trimmean([11.4, 17.3, 21.3, 25.9, 40.1],.2),23.2) NaN/inst/rms.m0000664002356700235670000000320211553522126013675 0ustar schloeglschloeglfunction o=rms(x,DIM,W) % RMS calculates the root mean square % can deal with complex data. % % y = rms(x,DIM,W) % % DIM dimension % 1 STD of columns % 2 STD of rows % N STD of N-th dimension % default or []: first DIMENSION, with more than 1 element % W weights to compute weighted s.d. (default: []) % if W=[], all weights are 1. % number of elements in W must match size(x,DIM) % % y estimated standard deviation % % features: % - can deal with NaN's (missing values) % - weighting of data % - dimension argument also in Octave % - compatible to Matlab and Octave % % see also: SUMSKIPNAN, MEAN % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: rms.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2003,2008,2009 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ if nargin<2, [o,N,ssq] = sumskipnan(x); elseif nargin<3 [o,N,ssq] = sumskipnan(x,DIM); else [o,N,ssq] = sumskipnan(x,DIM,W); end; o = sqrt(ssq./N); NaN/inst/sumskipnan.m0000664002356700235670000001272312540557475015310 0ustar schloeglschloeglfunction [o,count,SSQ] = sumskipnan(x, DIM, W) % SUMSKIPNAN adds all non-NaN values. % % All NaN's are skipped; NaN's are considered as missing values. % SUMSKIPNAN of NaN's only gives O; and the number of valid elements is return. % SUMSKIPNAN is also the elementary function for calculating % various statistics (e.g. MEAN, STD, VAR, RMS, MEANSQ, SKEWNESS, % KURTOSIS, MOMENT, STATISTIC etc.) from data with missing values. % SUMSKIPNAN implements the DIMENSION-argument for data with missing values. % Also the second output argument return the number of valid elements (not NaNs) % % Y = sumskipnan(x [,DIM]) % [Y,N,SSQ] = sumskipnan(x [,DIM]) % [...] = sumskipnan(x, DIM, W) % % x input data % DIM dimension (default: []) % empty DIM sets DIM to first non singleton dimension % W weight vector for weighted sum, numel(W) must fit size(x,DIM) % Y resulting sum % N number of valid (not missing) elements % SSQ sum of squares % % the function FLAG_NANS_OCCURED() returns whether any value in x % is a not-a-number (NaN) % % features: % - can deal with NaN's (missing values) % - implements dimension argument. % - computes weighted sum % - compatible with Matlab and Octave % % see also: FLAG_NANS_OCCURED, SUM, NANSUM, MEAN, STD, VAR, RMS, MEANSQ, % SSQ, MOMENT, SKEWNESS, KURTOSIS, SEM % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % $Id: sumskipnan.m 12826 2015-06-18 15:09:49Z schloegl $ % Copyright (C) 2000-2005,2009,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ global FLAG_NANS_OCCURED; if nargin<2, DIM = []; end; if nargin<3, W = []; end; % an efficient implementation in C of the following lines % could significantly increase performance % only one loop and only one check for isnan is needed % An MEX-Implementation is available in sumskipnan.cpp % % Outline of the algorithm: % for { k=1,o=0,count=0; k++; k1,1); if isempty(DIM), DIM = 1; end; end if (DIM<1), DIM = 1; end; %% Hack, because min([])=0 for FreeMat v3.5 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % non-float data %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if (isempty(W) && (~(isa(x,'float') || isa(x,'double')))) || ~flag_implicit_skip_nan(), %%% skip always NaN's if ~isempty(W) error('SUMSKIPNAN: weighted sum of integers not supported, yet'); end; x = double(x); o = sum(x,DIM); if nargout>1 sz = size(x); N = sz(DIM); sz(DIM) = 1; count = repmat(N,sz); if nargout>2 x = x.*x; SSQ = sum(x,DIM); end; end; return; end; if ~isempty(W) && (size(x,DIM)~=numel(W)) error('SUMSKIPNAN: size of weight vector does not match size(x,DIM)'); end; %% mex and oct files expect double x = double(x); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % use Matlab-MEX function when available %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %if 1, try %% using sumskipnan_mex.mex if issparse(x), fprintf(2,'sumskipnan: sparse matrix converted to full matrix\n'); x = full(x); end; %% !!! hack: FLAG_NANS_OCCURED is an output argument, reserve memory !!! if isempty(FLAG_NANS_OCCURED), FLAG_NANS_OCCURED = logical(0); % default value end; if (nargout<2), o = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W); if (~isreal(x)) io = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W); o = o + i*io; end; return; elseif (nargout==2), [o,count] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W); if (~isreal(x)) [io,icount] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W); if any(count(:)-icount(:)) error('Number of NaNs differ for REAL and IMAG part'); else o = o+i*io; end; end; return; elseif (nargout>=3), [o,count,SSQ] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W); if (~isreal(x)) [io,icount,iSSQ] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W); if any(count(:)-icount(:)) error('Number of NaNs differ for REAL and IMAG part'); else o = o+i*io; SSQ = SSQ+iSSQ; end; end; return; end; end; if ~isempty(W) error('weighted sumskipnan requires sumskipnan_mex'); end; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % count non-NaN's %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if nargout>1, count = sum(x==x,DIM); FLAG_NANS_OCCURED = any(count(:)2, x = real(x).^2 + imag(x).^2; SSQ = sum(x,DIM); end; %!assert(sumskipnan([1,2],1),[1,2]) %!assert(sumskipnan([1,NaN],2),1) %!assert(sumskipnan([1,NaN],2),1) %!assert(sumskipnan([nan,1,4,5]),10) %!assert(sumskipnan([nan,1,4,5]',1,[3;2;1;0]),6) NaN/inst/cdfplot.m0000664002356700235670000000355711601145313014535 0ustar schloeglschloeglfunction [h,stats] = cdfplot(X, varargin) % CDFPLOT plots empirical commulative distribution function % % cdfplot(X) % cdfplot(X, FMT) % cdfplot(X, PROPERTY, VALUE,...) % h = cdfplot(...) % [h,stats] = cdfplot(X) % % X contains the data vector % (matrix data is currently changed to a vector, this might change in future) % FMT,PROPERTY,VALUE % are used for formating; see HELP PLOT for more details % h graphics handle to the cdf curve % stats % a struct containing various summary statistics including % mean, std, median, min, max. % % see also: ecdf, median, statistics, hist2res, plot % % References: % $Id: cdfplot.m 8351 2011-06-24 17:35:07Z carandraug $ % Copyright (C) 2009,2010 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 3 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. his = histo_mex(X(:)); cdf = cumsum(his.H,1) ./ sum(his.H,1); ix1 = ceil ([1:2*size(his.X,1)]'/2); ix2 = floor([2:2*size(his.X,1)]'/2); hh = plot (his.X(ix1), [0; cdf(ix2)], varargin{:}); if nargout>0, h = hh; end; if nargout>1, stats = hist2res(his); stats.median = quantile(his,.5); end; NaN/inst/range.m0000664002356700235670000000342611553527111014177 0ustar schloeglschloeglfunction Q=range(Y,DIM) % RANGE calculates the range of Y % Missing values (encoded as NaN) are ignored. % % Q = range(Y) % Q = range(Y,DIM) % returns the range along dimension DIM of sample array Y. % % Q = range(HIS) % returns the RANGE from the histogram HIS. % HIS must be a HISTOGRAM struct as defined in HISTO2 or HISTO3. % % see also: IQR, MAD, HISTO2, HISTO3, PERCENTILE, QUANTILE % $Id$ % Copyright (C) 2009,2010,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin<2, DIM = []; end; if isempty(DIM), DIM = find(size(Y)>1,1); if isempty(DIM), DIM = 1; end; end; if nargin<1, help range else SW = isstruct(Y); if SW, SW = isfield(Y,'datatype'); end; if SW, SW = strcmp(Y.datatype,'HISTOGRAM'); end; if SW, Q = repmat(NaN,1,size(Y.H,2)); for k=1:size(Y.H,2); t = Y.X(find(Y.H(:,k)>0),min(size(Y.X,2),k)); Q(1,k) = max(t)-min(t); end; elseif isnumeric(Y) && nargin==1, Q = max(Y) - min(Y); elseif isnumeric(Y) && nargin==2, Q = max(Y,[],DIM) - min(Y,[],DIM); else help range end; end; NaN/inst/spearman.m0000664002356700235670000000340411553522126014706 0ustar schloeglschloeglfunction r = spearman(x,y) % SPEARMAN Spearman's rank correlation coefficient. % This function is replaced by CORRCOEF. % Significance test and confidence intervals can be obtained from CORRCOEF. % % [R,p,ci1,ci2] = CORRCOEF(x, [y, ] 'Rank'); % % For some (unknown) reason, in previous versions Spearman's rank correlation % r = corrcoef(ranks(x)). % But according to [1], Spearman's correlation is defined as % r = 1-6*sum((ranks(x)-ranks(y)).^2)/(N*(N*N-1)) % The results are different. Here, the later version is implemented. % % see also: CORRCOEF, RANKCORR % % REFERENCES: % [1] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html % [2] http://mathworld.wolfram.com/CorrelationCoefficient.html % $Id: spearman.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2000-2002 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . % warning('SPEARMAN might become obsolete; use CORRCOEF(...,''Spearman'') instead'); if nargin < 2 r = corrcoef(x,'Spearman'); else r = corrcoef(x,y,'Spearman'); end NaN/inst/xcovf.m0000664002356700235670000000720311715164711014230 0ustar schloeglschloeglfunction [C,N,LAGS] = xcovf(X,Y,MAXLAG,SCALEOPT) % XCOVF generates cross-covariance function. % XCOVF is the same as XCORR except % X and Y can contain missing values encoded with NaN. % NaN's are skipped, NaN do not result in a NaN output. % The output gives NaN only if there are insufficient input data % % [C,N,LAGS] = xcovf(X,MAXLAG,SCALEOPT); % calculates the (auto-)correlation function of X % [C,N,LAGS] = xcovf(X,Y,MAXLAG,SCALEOPT); % calculates the crosscorrelation function between X and Y % % SCALEOPT [character string] specifies the type of scaling applied % to the correlation vector (or matrix). is one of: % 'none' return the unscaled correlation, R, % 'biased' return the biased average, R/N, % 'unbiased' return the unbiassed average, R(k)/(N-|k|), % 'coeff' return the correlation coefficient, R/(rms(x).rms(y)), % where "k" is the lag, and "N" is the length of X. % If omitted, the default value is "none". % If Y is supplied but does not have the ame length as X, % scale must be "none". % % % see also: COVM, XCORR % $Id: xcovf.m 9608 2012-02-10 09:56:25Z schloegl $ % Copyright (C) 2005,2010,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin<2, Y = []; MAXLAG = []; SCALEOPT = 'none'; elseif ischar(Y), MAXLAG = Y; SCALEOPT=MAXLAG; Y=[]; elseif all(size(Y)==1), if nargin<3 SCALEOPT = 'none'; else SCALEOPT = MAXLAG; end; MAXLAG = Y; Y = []; end; if 0, elseif isempty(Y) && isempty(MAXLAG) NX = isnan(X); X(NX) = 0; [C,LAGS] = xcorr(X,'none'); [N,LAGS] = xcorr(1-NX,'none'); elseif ~isempty(Y) && isempty(MAXLAG) NX = isnan(X); NY = isnan(Y); X(NX) = 0; Y(NY) = 0; [C,LAGS] = xcorr(X,Y,'none'); [N,LAGS] = xcorr(1-NX,1-NY,'none'); elseif isempty(Y) && ~isempty(MAXLAG) NX = isnan(X); X(NX) = 0; [C,LAGS] = xcorr(X,MAXLAG,'none'); [N,LAGS] = xcorr(1-NX,MAXLAG,'none'); elseif ~isempty(Y) && ~isempty(MAXLAG) NX = isnan(X); NY = isnan(Y); X(NX) = 0; Y(NY) = 0; [C,LAGS] = xcorr(X,Y,MAXLAG,'none'); [N,LAGS] = xcorr(1-NX,1-NY,MAXLAG,'none'); end; if 0, elseif strcmp(SCALEOPT,'none') % done elseif strcmp(SCALEOPT,'coeff') ix = find(LAGS==0); if ~any(size(X)==1), %% ~isvector(X) c = C(ix,1:size(X,2)+1:end); %% diagonal elements v = c.^-0.5; % sqrt(1./c(:)); v = v'*v; C = C.*repmat(v(:).',size(C,1),1); elseif isempty(Y) C = C/C(ix); else C = C/sqrt(sumsq(X)*sumsq(Y)); end; elseif strcmp(SCALEOPT,'biased') C = C./repmat(max(N),size(C,1),1); elseif strcmp(SCALEOPT,'unbiased') C = C./(repmat(max(N),size(C,1),1)-repmat(LAGS,1,size(C,2))); else warning('invalid SCALEOPT - not supported'); end; NaN/inst/mahal.m0000664002356700235670000000314612435614767014202 0ustar schloeglschloeglfunction [d] = mahal(X,Y) % MAHAL return the Mahalanobis' D-square distance between the % multivariate samples x and y, which must have the same number % of components (columns), but may have a different number of observations (rows). % % d = mahal(X,Y) % % d(k) = (X(k,:)-MU)*inv(SIGMA)*(X(k,:)-MU)' % % where MU and SIGMA are the mean and the covariance matrix of Y % % % see also: TRAIN_SC, TEST_SC, COVM % % References: % Copyright (C) 2009,2014 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 2 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. sx = size(X); sy = size(Y); if sx(2)~=sy(2), error('number of columns of X and Y do not fit'); end; % compute mean of Y and remove it [Y,m] = center(Y,1); % compute inverse covariance matrix [CC,MM] = covm(Y,'M'); IR= inv(CC./max(0,MM-1)); % remove mean of Y X = X-m(ones(size(X,1),1),:); d = sum((X*IR).*X,2) NaN/inst/nanmean.m0000664002356700235670000000254711553522126014524 0ustar schloeglschloeglfunction [o] = nanmean(i,DIM) % NANMEAN same as SUM but ignores NaN's. % NANMEAN is OBSOLETE; use MEAN instead. NANMEAN is included % to provide backward compatibility % % Y = nanmean(x [,DIM]) % % DIM dimension % 1 sum of columns % 2 sum of rows % default or []: first DIMENSION with more than 1 element % Y resulting mean % % % see also: MEAN, SUMSKIPNAN, NANSUM % $Id: nanmean.m 8223 2011-04-20 09:16:06Z schloegl $ % Copyright (C) 2009 by Alois Schloegl % This is part of the NaN-toolbox. For more details see % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin>1 [o,n] = sumskipnan(i,DIM); else [o,n] = sumskipnan(i); end; o=o./n; NaN/inst/quantile.m0000664002356700235670000001030011714752314014716 0ustar schloeglschloeglfunction Q=quantile(Y,q,DIM,method) % QUANTILE calculates the quantiles of histograms and sample arrays. % % Q = quantile(Y,q) % Q = quantile(Y,q,DIM) % returns the q-th quantile along dimension DIM of sample array Y. % size(Q) is equal size(Y) except for dimension DIM which is size(Q,DIM)=length(Q) % % Q = quantile(HIS,q) % returns the q-th quantile from the histogram HIS. % HIS must be a HISTOGRAM struct as defined in HISTO2 or HISTO3. % If q is a vector, the each row of Q returns the q(i)-th quantile % % see also: HISTO2, HISTO3, PERCENTILE % $Id: quantile.m 9601 2012-02-09 14:14:36Z schloegl $ % Copyright (C) 1996-2003,2005,2006,2007,2009,2011 by Alois Schloegl % This function is part of the NaN-toolbox % http://pub.ist.ac.at/~schloegl/matlab/NaN/ % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 3 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; If not, see . if nargin<3, DIM = []; end; if isempty(DIM), DIM = find(size(Y)>1,1); if isempty(DIM), DIM = 1; end; end; if nargin<2, help quantile else [q, rix] = sort(q(:)'); % sort quantile values [tmp,rix] = sort(rix); % generate reverse index SW = isstruct(Y); if SW, SW = isfield(Y,'datatype'); end; if SW, SW = strcmp(Y.datatype,'HISTOGRAM'); end; if SW, [yr, yc] = size(Y.H); Q = repmat(nan,length(q),yc); if ~isfield(Y,'N'); Y.N = sum(Y.H,1); end; for k1 = 1:yc, tmp = Y.H(:,k1)>0; h = full(Y.H(tmp,k1)); t = Y.X(tmp,min(size(Y.X,2),k1)); N = Y.N(k1); t2(1:2:2*length(t)) = t; t2(2:2:2*length(t)) = t; x2 = cumsum(h); x(1)=0; x(2:2:2*length(t)) = x2; x(3:2:2*length(t)) = x2(1:end-1); % Q(q < 0 | 1 < q,:) = NaN; % already done at initialization Q(q==0,k1) = t2(1); Q(q==1,k1) = t2(end); n = 1; for k2 = find( (0 < q) & (q < 1) ) while (q(k2)*N > x(n)), n=n+1; end; if q(k2)*N==x(n) % mean of upper and lower bound Q(k2,k1) = (t2(n)+t2(n+1))/2; else Q(k2,k1) = t2(n); end; end; Q = Q(rix,:); % order resulting quantiles according to original input q end; elseif isnumeric(Y), sz = size(Y); if DIM>length(sz), sz = [sz,ones(1,DIM-length(sz))]; end; f = zeros(1,length(q)); f( (q < 0) | (1 < q) ) = NaN; D1 = prod(sz(1:DIM-1)); D3 = prod(sz(DIM+1:length(sz))); Q = repmat(nan,[sz(1:DIM-1),length(q),sz(DIM+1:length(sz))]); for k = 0:D1-1, for l = 0:D3-1, xi = k + l * D1*sz(DIM) + 1 ; xo = k + l * D1*length(q) + 1; t = Y(xi:D1:xi+D1*sz(DIM)-1); t = t(~isnan(t)); N = length(t); if (N==0) f(:) = NaN; else t = sort(t); t2(1:2:2*length(t)) = t; t2(2:2:2*length(t)) = t; x = floor((1:2*length(t))/2); %f(q < 0 | 1 < q) = NaN; % for efficiency its defined outside loop f(q==0) = t2(1); f(q==1) = t2(end); n = 1; for k2 = find( (0 < q) & (q < 1) ) while (q(k2)*N > x(n)), n = n+1; end; if q(k2)*N==x(n) % mean of upper and lower bound f(k2) = (t2(n) + t2(n+1))/2; else f(k2) = t2(n); end; end; end; Q(xo:D1:xo + D1*length(q) - 1) = f(rix); end; end; else fprintf(2,'Error QUANTILES: invalid input argument\n'); return; end; end; %!assert(quantile(1:10,[.2,.5]),[2.5, 5.5])