strings/0000755000175000017500000000000011730706774013563 5ustar carandraugcarandraugstrings/inst/0000755000175000017500000000000011730652735014535 5ustar carandraugcarandraugstrings/inst/base64encode.m0000644000175000017500000000423711730472502017153 0ustar carandraugcarandraug## Author: Paul Kienzle ## This program is granted to the public domain. ## -*- texinfo -*- ## @deftypefn {Function File} {@var{Y} =} base64encode (@var{X}) ## @deftypefnx {Function File} {@var{Y} =} base64encode (@var{X}, @var{do_reshape}) ## Convert X into string of printable characters according to RFC 2045. ## The input may be a string or a matrix of integers in the range 0..255. ## If want the output in the 1-row of strings format, pass the ## @var{do_reshape} argument as true. ## ## Example: ## @example ## @group ## base64encode('Hakuna Matata',true) ## ##returns 'SGFrdW5hIE1hdGF0YQ==' ## ## @end group ## @end example ## @seealso{base64decode} ## @end deftypefn function Y = base64encode (X, do_reshape) if (nargin < 1) print_usage; elseif nargin != 2 do_reshape=false; endif if (ischar(X)) X = toascii(X); elseif (any(X(:)) != fix(X(:)) || any(X(:) < 0) || any(X(:) > 255)) error("base64encode is expecting integers in the range 0 .. 255"); endif n = length(X(:)); X = X(:); ## split the input into three pieces, zero padding to the same length in1 = X(1:3:n); in2 = zeros(size(in1)); in3 = zeros(size(in1)); in2(1:length(2:3:n)) = X(2:3:n); in3(1:length(3:3:n)) = X(3:3:n); ## put the top bits of the inputs into the bottom bits of the ## corresponding outputs out1 = fix(in1/4); out2 = fix(in2/16); out3 = fix(in3/64); ## add the bottom bits of the inputs as the top bits of the corresponding ## outputs out4 = in3 - 64*out3; out3 = out3 + 4*(in2 - 16*out2); out2 = out2 + 16*(in1 - 4*out1); ## correct the output for padding if (length(2:3:n) < length(1:3:n)) out3(length(out3)) = 64; endif if (length(3:3:n) < length(1:3:n)) out4(length(out4)) = 64; endif ## 6-bit encoding table, plus 1 for padding table = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; table([ out1']+ 1); table([ out2']+ 1); table([ out3']+ 1); table([ out4']+ 1); Y = table([ out1'; out2'; out3'; out4' ] + 1); if ( do_reshape ) Y = reshape(Y,[1, prod(size(Y))]); end endfunction %!assert(base64encode('Hakuna Matata',true),'SGFrdW5hIE1hdGF0YQ==') strings/inst/strjoin.m0000644000175000017500000000401111730526414016371 0ustar carandraugcarandraug## Copyright (C) 2007 Muthiah Annamalai ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{rval} =} strjoin (@var{prefixstr}, @var{stringcell}) ## @deftypefnx {Function File} {@var{rval} =} strjoin (@var{prefixstr}, @var{varargs}) ## Joins the strings in @var{stringcell} with the @var{prefixstr} like the list-join ## function in Python; the second version allows usage with variable number of arguments. ## Note that, if using cell-array as a second argument, only 2 arguments are accepted. ## Also note that, both the arguments are strings or containers of strings (cells). ## ## @example ## @group ## strjoin(' loves-> ','marie','amy','beth') ## ##returns 'marie loves-> amy loves-> beth' ## ## strjoin('*',@{'Octave','Scilab','Lush','Yorick'@}) ## ##returns 'Octave*Scilab*Lush*Yorick' ## @end group ## @end example ## @seealso {strcmp} ## @end deftypefn function rval = strjoin (spacer, varargin) if (nargin < 2) || (nargin > 2 && iscell(varargin{1}) ) print_usage(); end if iscell(varargin{1}) varargin=varargin{1}; end rval=""; L=length(varargin); for idx=1:(L-1) rval=strcat(rval,sprintf('%s%s',varargin{idx},spacer)); end rval=strcat(rval,varargin{L}); endfunction %!assert(strjoin("-","hello"),"hello") %!assert(strjoin('*',{'Octave','Scilab','Lush','Yorick'}),'Octave*Scilab*Lush*Yorick') strings/inst/editdistance.m0000644000175000017500000000457411730526323017356 0ustar carandraugcarandraug## Copyright (C) 2006 Muthiah Annamalai ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{dist},@var{L}] =} editdistance (@var{string1}, @var{string2}, @var{weights}) ## Compute the Levenshtein edit distance between the strings @var{string1} and ## @var{string2}. This operation is symmetrical. ## ## The optional argument @var{weights} specifies weights for the ## deletion, matched, and insertion operations; by default it is set to ## +1, 0, +1 respectively, so that a least editdistance means a ## closer match between the two strings. This function implements ## the Levenshtein edit distance as presented in Wikipedia article, ## accessed Nov 2006. Also the levenshtein edit distance of a string ## with an empty string is defined to be its length. ## ## The default return value is @var{dist} the edit distance, and ## the other return value @var{L} is the distance matrix. ## ## @example ## @group ## editdistance('marry','marie') ## ##returns value +2 for the distance. ## @end group ## @end example ## ## @end deftypefn function [dist, L] = editdistance (str1, str2, weights) if(nargin < 2 || (nargin == 3 && length(weights) < 3) ) print_usage(); end L1=length(str1)+1; L2=length(str2)+1; L=zeros(L1,L2); if(nargin < 3) g=+1;%insertion m=+0;%match d=+1;%deletion else g=weights(1); m=weights(2); d=weights(3); end L(:,1)=[0:L1-1]'*g; L(1,:)=[0:L2-1]*g; m4=0; for idx=2:L1; for idy=2:L2 if(str1(idx-1)==str2(idy-1)) score=m; else score=d; end m1=L(idx-1,idy-1) + score; m2=L(idx-1,idy) + g; m3=L(idx,idy-1) + g; L(idx,idy)=min(m1,min(m2,m3)); end end dist=L(L1,L2); endfunction strings/inst/base64decode.m0000644000175000017500000000763711730472463017156 0ustar carandraugcarandraug## Copyright (C) 2007 Muthiah Annamalai ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{rval} =} base64decode (@var{code}) ## @deftypefnx {Function File} {@var{rval} =} base64decode (@var{code}, @var{as_string}) ## Convert a base64 @var{code} (a string of printable characters according to RFC 2045) ## into the original ASCII data set of range 0-255. If option @var{as_string} is ## passed, the return value is converted into a string. ## ## @example ## @group ## ##base64decode(base64encode('Hakuna Matata'),true) ## base64decode('SGFrdW5hIE1hdGF0YQ==',true) ## ##returns 'Hakuna Matata' ## @end group ## @end example ## ## See: http://www.ietf.org/rfc/rfc2045.txt ## ## @seealso {base64encode} ## @end deftypefn function z = base64decode (X, as_string) if (nargin < 1 ) print_usage; elseif nargin == 1 as_string=false; endif if ( any(X(:) < 0) || any(X(:) > 255)) error("base64decode is expecting integers in the range 0 .. 255"); endif ## decompose strings into the 4xN matrices ## formatting issues. if( rows(X) == 1 ) Y=[]; L=length(X); for z=4:4:L Y=[Y X(z-3:z)']; #keep adding columns end if min(size(Y))==1 Y=reshape(Y,[L, 1]); else Y=reshape(Y,[4,L/4]); end X=Y; Y=[]; end X = toascii(X); Xa= X; ## Work backwards. Starting at step in table, ## lookup the index of the element in the table. ## 6-bit encoding table, plus 1 for padding ## 26*2 + 10 + 2 + 1 = 64 + 1, '=' is EOF stop mark. table = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; S=size(X); SRows=S(1); SCols=S(2); Y=zeros(S); ## decode the incoming matrix & ## write the values into Va matrix. Va = -1*ones(size(Xa)); iAZ = (Xa >= 'A').*(Xa <= 'Z') > 0; Va(iAZ)=Xa(iAZ)-'A'; iaz = (Xa >= 'a').*(Xa <= 'z') > 0; Va(iaz)=Xa(iaz)-'a'+26; i09 = (Xa >= '0').*(Xa <= '9') > 0; Va(i09)=Xa(i09)-'0'+52; is = (Xa == '/') ; Va(is) = 63; ip = (Xa == '+') ; Va(ip) = 62; ieq = (Xa == '=') ; Va(ieq) = 0; clear is; clear ieq; clear ip; clear i09; clear iaz; clear iAZ; clear Xa; clear X; Y=Va; clear Va; Y1=Y(1,:); if (SRows > 1) Y2=Y(2,:); else Y2=zeros(1,SCols); end; if (SRows > 2) Y3=Y(3,:); else Y3=zeros(1,SCols); end; if (SRows > 3) Y4=Y(4,:); else Y4=zeros(1,SCols); end; ## +1 not required due to ASCII subtraction ## actual decoding work b1 = Y1*4 + fix(Y2/16); b2 = mod(Y2,16)*16+fix(Y3/4); b3 = mod(Y3,4)*64 + Y4; ZEROS=sum(sum(Y==0)); L=length(b1)*3; z=zeros(1,L); z(1:3:end)=b1; if (SRows > 1) z(2:3:end)=b2; else z(2:3:end)=[]; end; if (SRows > 2) z(3:3:end)=b3; else z(3:3:end)=[]; end ## FIXME ## is this expected behaviour? if ( as_string ) L=length(z); while ( ( L > 0) && ( z(L)==0 ) ) L=L-1; end z=char(z(1:L)); end endfunction %!assert(base64decode(base64encode('Hakuna Matata'),true),'Hakuna Matata') %!assert(base64decode(base64encode([1:255])),[1:255]) %!assert(base64decode(base64encode('taken'),true),'taken') %!assert(base64decode(base64encode('sax'),true),'sax') %!assert(base64decode(base64encode('H'),true),'H') %!assert(base64decode(base64encode('Ta'),true),'Ta') strings/inst/cstrcmp.m0000644000175000017500000000757611730526060016374 0ustar carandraugcarandraug## Copyright (C) 2007 Muthiah Annamalai ## Copyright (C) 2012 Carnë Draug ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} {@var{rval} =} cstrcmp (@var{s1}, @var{s2}) ## Compare strings @var{s1} and @var{s2} like the C function. ## ## Aside the difference to the return values, this function API is exactly the ## same as Octave's @code{strcmp} and will accept cell arrays as well. ## ## @var{rval} indicates the relationship between the strings: ## @itemize @bullet ## @item ## A value of 0 indicates that both strings are equal; ## @item ## A value of +1 indicates that the first character that does not match has a ## greater value in @var{s1} than in @var{s2}. ## @item ## A value of -1 indicates that the first character that does not match has a ## match has a smaller value in @var{s1} than in @var{s2}. ## @end itemize ## ## @example ## @group ## cstrcmp("marry","marry") ## @result{} 0 ## cstrcmp("marry","marri") ## @result{} +1 ## cstrcmp("marri","marry") ## @result{} -1 ## @end group ## @end example ## ## @seealso {strcmp, strcmpi} ## @end deftypefn function rval = cstrcmp (s1, s2) if (nargin != 2) print_usage(); endif ## this function is just like Octave's strcmp but the 0 and 1 need to be ## inverted. Once is done, if there are 1, we need to decide if they will ## be positive or negative. Also, since it's possible that the value needs ## to be negative, class must be double (strcmp returns logical) rval = double(!strcmp (s1, s2)); if (!any (rval)) ## all zeros, no need to do anything else return endif ## get index of the ones we have to "fix" idx = find (rval == 1); ## if any is not a cell, this simplifies the code that follows if (!iscell (s1)), s1 = {s1}; endif if (!iscell (s2)), s2 = {s2}; endif ## there's 2 hypothesis: ## - arrays have same length (even if it's only one cell) ## - arrays have different lengths (in which case, one will have a single cell) if (numel (s1) == numel (s2)) rval(idx) = cellfun (@get_sign, s1(idx), s2(idx)); elseif (numel (s1) > 1) rval(idx) = cellfun (@get_sign, s1(idx), s2(1)); elseif (numel (s2) > 1) rval(idx) = cellfun (@get_sign, s1(1), s2(idx)); endif endfunction function r = get_sign (s1, s2) ## strings may have different lengths which kinda complicates things ## in case the strings are of different size, we need to make them equal ## If once "trimmed", the strings are equal, the "shortest" string is ## considered smaller since the comparison is made by filling it with null ns1 = numel (s1); ns2 = numel (s2); nmin = min (ns1, ns2); ## if one of the strings is empty, we are already done if (nmin == 0), r = sign (ns1 - ns2); return endif s = sign (s1(1:nmin) - s2(1:nmin)); if (any (s)) ## if there's any difference between this part of the two strings, get the ## index of the first occurence and return its value r = s(find (s != 0, 1)); else r = sign (ns1 - ns2); endif endfunction %!assert(cstrcmp("hello","hello"),0); %!assert(cstrcmp("marry","marie"),+1); %!assert(cstrcmp("Matlab","Octave"),-1); %!assert(cstrcmp("Matlab",{"Octave","Scilab","Lush"}), [-1 -1 +1]); %!assert(cstrcmp({"Octave","Scilab","Lush"},"Matlab"), [+1 +1 -1]); strings/inst/strsort.m0000644000175000017500000000102111730652735016425 0ustar carandraugcarandraug## Author: Paul Kienzle ## This program is granted to the public domain. ## -*- texinfo -*- ## @deftypefn {Function File} {[@dots{}] =} strsort (@dots{}) ## Overloads the sort function to operate on strings. ## ## @seealso {sort} ## @end deftypefn # PKG_ADD dispatch ("sort", "strsort", "string") function [sorted,idx] = strsort(string,varargin) if nargout == 2 [s,idx] = sort(toascii(string),varargin{:}); else s = sort(toascii(string),varargin{:}); endif sorted = char(s); endfunction strings/NEWS0000644000175000017500000000071411730525762014260 0ustar carandraugcarandraugSummary of important user-visible changes for string 1.1.0: ------------------------------------------------------------------- ** The following functions have been removed since they are part of Octave core since 3.6.0: strtrim ** Package is no longer automatically loaded. ** The function `cstrcmp' has been completely rewritten. It should perform faster and will accept arguments exactly the same way as Octave's core `strcmp'. strings/src/0000755000175000017500000000000011730703316014337 5ustar carandraugcarandraugstrings/src/Makefile0000644000175000017500000000017211730703316015777 0ustar carandraugcarandraugMKOCTFILE = mkoctfile -Wall all: pcregexp.oct %.oct: %.cc $(MKOCTFILE) $< clean: rm -f *.o octave-core core *.oct *~ strings/src/pcregexp.cc0000644000175000017500000001050511730653045016467 0ustar carandraugcarandraug// Copyright (C) 2004 Stefan van der Walt // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // 1 Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // 2 Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS IS'' // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR // ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include #include //FIXME This function needs some documentation DEFUN_DLD(pcregexp, args, nargout, "\ -*- texinfo -*-\n\ @deftypefn {Loadable Function} {[@dots{}] =} pcregexp (@dots{})\n\ Perl-compatible regular expression matching.\n\ \n\ Check your system's @code{pcre} man page.\n\ \n\ @seealso{regexp}\n\ @end deftypefn\n\ ") { octave_value_list retval = octave_value_list(); if (args.length() != 2) { print_usage (); return retval; } std::string pattern = args(0).string_value(); std::string input = args(1).string_value(); if (error_state) { gripe_wrong_type_arg("pcregexp", args(0)); return retval; } // Compile expression pcre *re; const char *err; int erroffset; re = pcre_compile(pattern.c_str(), 0, &err, &erroffset, NULL); if (re == NULL) { error("pcregexp: %s at position %d of expression", err, erroffset); return retval; } // Get nr of subpatterns int subpatterns; int status = pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &subpatterns); // Match expression OCTAVE_LOCAL_BUFFER(int, ovector, (subpatterns+1)*3); int matches = pcre_exec(re, NULL, input.c_str(), input.length(), 0, 0, ovector, (subpatterns+1)*3); if (matches == PCRE_ERROR_NOMATCH) { for (int i=nargout-1; i>=0; i--) retval(i) = ""; retval(0) = Matrix(); pcre_free(re); return retval; } else if (matches < -1) { error("pcregexp: internal error calling pcre_exec"); return retval; } const char **listptr; status = pcre_get_substring_list(input.c_str(), ovector, matches, &listptr); if (status == PCRE_ERROR_NOMEMORY) { error("pcregexp: cannot allocate memory in pcre_get_substring_list"); pcre_free(re); return retval; } // Pack indeces Matrix indeces = Matrix(matches, 2); for (int i = 0; i < matches; i++) { indeces(i, 0) = ovector[2*i]+1; indeces(i, 1) = ovector[2*i+1]; if (indeces(i, 0) == 0) indeces(i, 1) = 0; } retval(0) = indeces; // Pack substrings retval.resize(nargout + 1); for (int i = 0; i < matches; i++) { retval(i+1) = *(listptr+i+1); } // Free memory pcre_free_substring_list(listptr); pcre_free(re); if (nargout > matches) { error("pcregexp: too many return values requested"); return octave_value_list(); } return retval; } /* %!assert(pcregexp("f(.*)uck"," firetruck "),[2,10;3,7]); %!test %! [m,b]=pcregexp("f(.*)uck"," firetruck "); %! assert(m,[2,10;3,7]); %! assert(b, "iretr") %!test %! [m,b] = pcregexp("a(.*?)d", "asd asd"); %! assert(m, [1,3;2,2]); %! assert(b, "s"); %!test %! [m,b] = pcregexp("a", "A"); %! assert(isempty(m)) %! assert(b, "") %!fail("[m,b] = pcregexp('a', 'a')", "pcregexp") %!fail("pcregexp('(', '')", "pcregexp") %! %!demo %! [m, s1] = pcregexp("(a.*?(d))", "asd asd") */ strings/DESCRIPTION0000644000175000017500000000056211730706774015274 0ustar carandraugcarandraugName: strings Version: 1.1.0 Date: 2012-03-16 Author: various authors Maintainer: Octave-Forge community Title: String Handling. Description: Additional manipulation functions Depends: octave (>= 3.6.0) Autoload: no BuildRequires: pcre-devel [Debian] libpcre3-dev License: GPLv3+, modified BSD, public domain Url: http://octave.sf.net strings/COPYING0000644000175000017500000000004211730706537014607 0ustar carandraugcarandraugSee individual files for licenses strings/INDEX0000644000175000017500000000021311730710207014333 0ustar carandraugcarandraugstrings >> Strings Search and replace pcregexp Operations strsort editdistance cstrcmp strjoin Conversion base64encode base64decode