xstr/ 40755 764 144 0 7512022712 10304 5ustar gerdusersxstr/xstr_search.ml100644 764 144 10446 6735526712 13324 0ustar gerdusers(* $Id: xstr_search.ml,v 1.1 1999/06/27 23:03:38 gerd Exp $ * ---------------------------------------------------------------------- * Search & Replace *) exception Replace_phrase of (int * string);; let index_of_substring_from s k_left substr = let l = String.length s in let lsub = String.length substr in let k_right = l - lsub in let c = if substr <> "" then substr.[0] else ' ' in let rec search k = if k <= k_right then begin if String.sub s k lsub = substr then k else let k_next = String.index_from s (k+1) c in search k_next end else raise Not_found in if substr = "" then k_left else search k_left ;; let rindex_of_substring_from s k_right substr = let l = String.length s in let lsub = String.length substr in let c = if substr <> "" then substr.[0] else ' ' in let rec search k = if k >= 0 then begin if String.sub s k lsub = substr then k else let k_next = String.rindex_from s (k-1) c in search k_next end else raise Not_found in if substr = "" then k_right else search k_right ;; let index_of_substring s substr = index_of_substring_from s 0 substr;; let rindex_of_substring s substr = rindex_of_substring_from s (String.length s - String.length substr) substr;; let contains_substring s substr = try let _ = index_of_substring s substr in true with Not_found -> false ;; let contains_substring_from s k_left substr = try let _ = index_of_substring_from s k_left substr in true with Not_found -> false ;; let rcontains_substring_from s k_right substr = try let _ = rindex_of_substring_from s k_right substr in true with Not_found -> false ;; let indexlist_of_substring s substr = let rec enumerate k = try let pos = index_of_substring_from s k substr in pos :: enumerate (pos+1) with Not_found -> [] in enumerate 0 ;; let rev_concat sep sl = (* = String.concat sep (List.rev sl), but more efficient *) let lsep = String.length sep in let rec get_len v sl = match sl with [] -> v | s :: sl' -> get_len (v + lsep + String.length s) sl' in let len = if sl = [] then 0 else get_len 0 sl - lsep in let t = String.create len in let rec fill_in k sl = match sl with [] -> () | [ s ] -> let s_len = String.length s in String.blit s 0 t (k-s_len) s_len | s :: sl' -> let s_len = String.length s in let k' = k - s_len in let k'' = k' - lsep in String.blit s 0 t k' s_len; String.blit sep 0 t k'' lsep; fill_in k'' sl' in fill_in len sl; t ;; let replace_char s rule = let l = String.length s in let rec replace coll k_last k = if k < l then begin let c = s.[k] in try let s' = rule c k in raise (Replace_phrase (1,s')) (* Alternatively, we could directly invoke 'replace' with some * parameters. But this would be a true recursion, without the * chance to be eliminated. * Would lead to Stack_overflow for large strings. *) with Match_failure(_,_,_) -> replace coll k_last (k+1) | Not_found -> replace coll k_last (k+1) | Replace_phrase (length, s') -> replace (s' :: String.sub s k_last (k-k_last) :: coll) (k+length) (k+length) end else String.sub s k_last (k-k_last) :: coll in rev_concat "" (replace [] 0 0) ;; let replace_substring s substrlist rule = let characters = (List.map (fun substr -> if substr = "" then failwith "replace_substring" else substr.[0]) substrlist) in let l = String.length s in let rec find k sl = match sl with [] -> raise Not_found | sub :: sl' -> let lsub = String.length sub in if k <= l - lsub & String.sub s k lsub = sub then let replacement = rule sub k in raise (Replace_phrase(lsub, replacement)) else raise Not_found in let rule' c k = if List.mem c characters then find k substrlist else raise Not_found in let rule'' c0 c k = if c = c0 then find k substrlist else raise Not_found in if List.length substrlist = 1 then replace_char s (rule'' (List.hd substrlist).[0]) else replace_char s rule' ;; (* ====================================================================== * History: * * $Log: xstr_search.ml,v $ * Revision 1.1 1999/06/27 23:03:38 gerd * Initial revision. * * *) xstr/README100644 764 144 6221 7512022673 11270 0ustar gerdusers---------------------------------------------------------------------- About xstr ---------------------------------------------------------------------- This package implements frequent string operations: searching, replacing, splitting, matching. It is independent from the Str library, and can replace Str in many cases. Unlike Str, xstr is thread-safe. xstr does not implement regular expressions in general, but an important subset. Some operations of xstr are performed as quickly as by Str; if the string to be processed is small, xstr is often faster than Str; if the string is big, xstr is upto half as fast than Str. ---------------------------------------------------------------------- BUILD: ---------------------------------------------------------------------- make all: Compiles the bytecode archive. make opt: Compiles the native archive. ---------------------------------------------------------------------- INSTALL: ---------------------------------------------------------------------- The module can be installed using the installation method supported by the "findlib" module. If you have installed "findlib", a "make install" copies the compiled files to their standard places. The package name is then "xstr". ("make uninstall" removes the package again.) If you do not have "findlib", just copy the files manually where you like them (cp *.cmi *.cma *.cmxa *.a ). After installation, a "make clean" removes the intermediate files. ---------------------------------------------------------------------- SPEED: ---------------------------------------------------------------------- In the "speed" directory some problems are solved using xstr and Str. The results on a 486/133Mhz system: Using xstr Using Str Test Pattern matching 1: 0.002400 seconds 0.001000 seconds Test Pattern matching 2: 0.000073 seconds 0.000229 seconds Test Pattern matching 3: 0.007460 seconds 0.004060 seconds Test Pattern replacing 1: 0.008500 seconds 0.004000 seconds Test Pattern replacing 2: 0.051300 seconds 0.042400 seconds Test Splitting 1: 0.020400 seconds 0.016100 seconds Test Splitting 2: 0.000220 seconds 0.000200 seconds Test Substring searching: 0.000890 seconds 0.000810 seconds Test Unquoting 1: 0.030200 seconds 0.031200 seconds Test Unquoting 2: 0.000195 seconds 0.000353 seconds ---------------------------------------------------------------------- CHANGES: ---------------------------------------------------------------------- Changed in version 0.2.1: Fix in mkset Changed in version 0.2: Bugfix in replace_matched_substrings. Changed in version 0.1.2: Updated URLs in documentation. ---------------------------------------------------------------------- AUTHOR: ---------------------------------------------------------------------- The module has been written by Gerd Stolpmann, gerd@gerd-stolpmann.de You can download it from http://www.ocaml-programming.de/packages/. This module has an entry in the O'Caml link database, http://www.npc.de/ocaml/linkdb/ ---------------------------------------------------------------------- LICENSE: ---------------------------------------------------------------------- See the LICENSE file. xstr/xstr_split.ml100644 764 144 13365 6740725644 13216 0ustar gerdusers(* $Id: xstr_split.ml,v 1.2 1999/07/06 21:32:09 gerd Exp $ * ---------------------------------------------------------------------- * *) type cclass = CData | CIgnore | CSeparator | CIgnoreOrSeparator ;; let split_string ignoreset ignoreleft ignoreright separators (* s *) = (* 'ignoreset': Characters that are ignored before and after the * separator * 'ignoreleft': true means to ignore characters from 'ignoreset' at * the beginning of the string * 'ignoreright': true means to ignore characters from 'ignoreset' at * the end of the string * 'separators': list of separating strings * 's': string to split * * EXAMPLES: * * - split_string " " true true [ "," ] "a, b, c ,d " * = [ "a"; "b"; "c"; "d" ] * - split_string "" true true [ "," ] "a, b, c ,d " * = [ "a"; " b"; " c "; "d " ] * - split_string " " false false [ "," ] "a, b, c ,d " * = [ "a"; "b"; "c"; "d " ] *) let character_classification = let a = Array.create 256 CData in let sepchars = List.flatten (List.map (fun sep -> if sep <> "" then [sep.[0]] else []) separators) in let ignorechars = let l = ref [] in for k = 0 to String.length ignoreset - 1 do l := ignoreset.[k] :: !l done; !l in List.iter (fun c -> a.( Char.code c ) <- CSeparator) sepchars; List.iter (fun c -> let code = Char.code c in if a.( code ) = CSeparator then a.( code ) <- CIgnoreOrSeparator else a.( code ) <- CIgnore) ignorechars; a in fun s -> let l = String.length s in let rec split_over_word i_wordbeg i_wordend i_current = (* i_wordbeg <= i_wordend: i_current has not yet reached the next * separator. i_wordbeg is the position of * the first CData character, i_wordend the * position after the last CData Character of * the word. *) if i_current < l then begin let code = Char.code (s.[i_current]) in let cl = character_classification.(code) in match cl with CData -> (* split i_wordbeg (i_current+1) (i_current+1) *) fast_skip_word i_wordbeg (i_current+1) | CIgnore -> split_over_word i_wordbeg i_wordend (i_current+1) | _ -> let rec find_sep sepl = match sepl with [] -> if cl = CSeparator then (* just as CData *) (* split i_wordbeg (i_current+1) (i_current+1) *) fast_skip_word i_wordbeg (i_current+1) else (* just as CIgnore *) split_over_word i_wordbeg i_wordend (i_current+1) | sep :: sepl' -> let lsep = String.length sep in if i_current + lsep <= l & String.sub s i_current lsep = sep then (* found separator *) String.sub s i_wordbeg (i_wordend - i_wordbeg) :: split_after_word (i_current + lsep) (i_current + lsep) else find_sep sepl' in find_sep separators end else (* i_current >= l *) if ignoreright then [ String.sub s i_wordbeg (i_wordend - i_wordbeg) ] else [ String.sub s i_wordbeg (i_current - i_wordbeg) ] and split_after_word i_wordbeg i_current = (* i_wordbeg > i_wordend: i_current is just after the separator and * searches the next word beginning *) if i_current < l then begin let code = Char.code (s.[i_current]) in let cl = character_classification.(code) in match cl with CData -> (* split i_current (i_current+1) (i_current+1) *) fast_skip_word i_current (i_current+1) | (CIgnore|CIgnoreOrSeparator) -> split_after_word i_wordbeg (i_current+1) | CSeparator -> let rec find_sep sepl = match sepl with [] -> (* split i_wordbeg (i_current+1) (i_current+1) *) fast_skip_word i_wordbeg (i_current+1) | sep :: sepl' -> let lsep = String.length sep in if i_current + lsep < l & String.sub s i_current lsep = sep then (* found separator *) "" :: split_after_word (i_current + lsep) (i_current + lsep) else find_sep sepl' in find_sep separators end else (* i_current >= l *) if i_wordbeg = 0 then [] (* not any word found *) else [ "" ] (* Now some frequent special cases *) and fast_skip_word i_wordbeg i_current = (* i_wordbeg <= i_current = i_wordend *) if i_current < l-1 then begin let code1 = Char.code (s.[i_current]) in let cl1 = character_classification.(code1) in match cl1 with CData -> begin let code2 = Char.code (s.[i_current+1]) in let cl2 = character_classification.(code2) in match cl2 with CData -> fast_skip_word i_wordbeg (i_current+2) | CIgnore -> split_over_word i_wordbeg (i_current+1) (i_current+2) | _ -> (* continue with the general routine *) split_over_word i_wordbeg (i_current+1) (i_current+1) end | CIgnore -> split_over_word i_wordbeg i_current (i_current+1) | _ -> (* continue with the general routine *) split_over_word i_wordbeg i_current i_current end else split_over_word i_wordbeg i_current i_current in if ignoreleft then split_after_word 0 0 else split_over_word 0 0 0 ;; (* ====================================================================== * History: * * $Log: xstr_split.ml,v $ * Revision 1.2 1999/07/06 21:32:09 gerd * Tried to optimize the function; but currently without success. * There should be deeper analysis -- on the other hand, splitting seems * to be relative fast compared with the Str splitting function. * Perhaps the improvements have an effect on machines with bigger caches. * * Revision 1.1 1999/06/27 23:03:38 gerd * Initial revision. * * *) xstr/xstr_search.mli100644 764 144 10760 6735526712 13474 0ustar gerdusers(* $Id: xstr_search.mli,v 1.1 1999/06/27 23:03:38 gerd Exp $ * ---------------------------------------------------------------------- * Search & Replace *) exception Replace_phrase of (int * string);; (* see 'replace_char' and 'replace_string' *) val index_of_substring_from : string -> int -> string -> int (* index_of_substring_from s k_left substr: * finds the leftmost index >= k_left where 'substr' occurs within s * or raises Not_found. *) val rindex_of_substring_from : string -> int -> string -> int (* eindex_of_substring_from s k_right substr: * finds the rightmost index <= k_right where 'substr' occurs within s * or raises Not_found. *) val index_of_substring : string -> string -> int (* index_of_substring s substr: * finds the leftmost index where 'substr' occurs within s * or raises Not_found. *) val rindex_of_substring : string -> string -> int (* eindex_of_substring s substr: * finds the rightmost index where 'substr' occurs within s * or raises Not_found. *) val contains_substring : string -> string -> bool (* contains_substring s substr: * true iff substr occurs in s *) val contains_substring_from : string -> int -> string -> bool (* contains_substring_from s k_left substr: * true iff substr occurs in s at index k_left or later *) val rcontains_substring_from : string -> int -> string -> bool (* rcontains_substring_from s k_right substr: * true iff substr occurs in s at index k_right or earlier *) val indexlist_of_substring : string -> string -> int list (* indexlist_of_substring s substr: * Returns a list of all indexes of substrings substr in s *) val rev_concat : string -> string list -> string (* rev_concat s l = String.concat s (List.rev l) *) val replace_char : string -> (char -> int -> string) -> string (* replace_char s rule: * replaces characters in s according to rule. * rule c k = s' means: replace character c where c = s.[k] by s' * The rule may raise Match_failure or Not_found in which case * the character is not replaced. * It may raise Replace_phrase (l,s') which means that the l * characters at k should be replaced by s'. * * EXAMPLE: * * - replace '<', '>', '&' in an HTML document by CDATA entities: * replace_char s (fun c k -> * match c with * '<' -> "<" * | '>' -> ">" * | '&' -> "&" * ) * - replace backslashes by double-backslashes and precede quotes with * backslashes: * replace_char s (fun c k -> * match c with * '\\' -> "\\\\" * '"' -> "\\\"") * - the reverse function (remove backslashes): * replace_char s (fun c k -> * match c with * '\\' -> begin * if k+1 < String.length s then * raise * (Replace_phrase * (2, String.make 1 (s.[k+1]))) * else * raise Not_found * end) *) val replace_substring : string -> string list -> (string -> int -> string) -> string (* replace_substring s substrlist rule: * replaces all occurences of substrings in 's' which are enumerated * in 'substrlist' by applying 'rule'. * rule t k = t': means that substring t at position k is replaced by t' * The rule may raise Match_failure or Not_found in which case * the character is not replaced. * It may raise Replace_phrase (l,s') which means that the l * characters at k should be replaced by s'. * * EXAMPLE: * * - Interpret CDATA entities of HTML: * replace_substring s * [ "<"; ">"; "&" ] * (fun s k -> match s with * "<" -> "<" * | ">" -> ">" * | "&" -> "&") *) (* ====================================================================== * History: * * $Log: xstr_search.mli,v $ * Revision 1.1 1999/06/27 23:03:38 gerd * Initial revision. * * *) xstr/RELEASE100644 764 144 6 7512022712 11320 0ustar gerdusers0.2.1 xstr/xstr_split.mli100644 764 144 4345 6737736714 13353 0ustar gerdusers(* $Id: xstr_split.mli,v 1.2 1999/07/04 20:02:20 gerd Exp $ * ---------------------------------------------------------------------- * Split strings into words *) val split_string : string -> bool -> bool -> string list -> string -> string list (* split_string ignoreset ignoreleft ignoreright separators s: * * Splits 's' into words; the other parameters control the recognition * of words. * 'separators' is a list of strings that may separate the words. * 'ignoreset' is a list of characters (written as string) that are * ignored before and after separators. * 'ignoreleft' controls whether characters can be ignored before the * first word, too. * 'ignoreright' controls whether characters can be ignored after the * last word, too. * * Empty separators may have strange effects. Do not use them. * * 'ignoreleft = false' implies that at least one word will be found. * For example: * split_string " " false true [ "," ] " " = [ "" ] * 'ignoreright = false' does not have this effect: * split_string " " true false [ "," ] " " = [ ] * * Note that it is possible to ignore characters that are the beginning * of separators. E.g. * split_string " " true true [ " " ] " ab c d " = [ "ab"; "c"; "d" ] * * If many strings are splitted with the same rules, the following is * the recommended way: * let split_rule = split_string ign il ir seps in * ...call many times split_rule s for different values of s... * It is much more efficient than calling split_string directly. * * [Note thread-safety: such a 'split_rule' is allowed to be shared by * several threads.] * * EXAMPLES: * * - split_string " " true true [ "," ] "a, b, c ,d " * = [ "a"; "b"; "c"; "d" ] * - split_string "" true true [ "," ] "a, b, c ,d " * = [ "a"; " b"; " c "; "d " ] * - split_string " " false false [ "," ] "a, b, c ,d " * = [ "a"; "b"; "c"; "d " ] *) (* ====================================================================== * History: * * $Log: xstr_split.mli,v $ * Revision 1.2 1999/07/04 20:02:20 gerd * Improved the comments. * * Revision 1.1 1999/06/27 23:03:38 gerd * Initial revision. * * *) xstr/xstr_match.ml100664 764 144 35407 7512022624 13144 0ustar gerdusers(* $Id: xstr_match.ml,v 1.9 2002/07/07 11:27:16 gerd Exp $ * ---------------------------------------------------------------------- * String matching *) type variable = { mutable sref : string; mutable found : bool; mutable begun : bool; mutable from : int; mutable len : int } ;; type charset = int array;; type matcher = Literal of string | Anystring | Lazystring | Anychar | Anystring_from of charset | Lazystring_from of charset | Anychar_from of charset | Nullstring | Alternative of matcher list list | Optional of matcher list | Record of (variable * matcher list) | Scanner of (string -> int) ;; (**********************************************************************) (* operations on sets *) (* copied from the JavaCaml regexp implementation *) let the_full_set = Array.create 16 0xffff;; let the_empty_set = Array.create 16 0;; let dup_set s = Array.copy s ;; let empty_set () = the_empty_set ;; let full_set () = the_full_set ;; let ( +! ) a b = (* union *) let r = Array.create 16 0 in for i=0 to 15 do r.(i) <- a.(i) lor b.(i) done; r ;; let ( *! ) a b = (* intersection *) let r = Array.create 16 0 in for i=0 to 15 do r.(i) <- a.(i) land b.(i) done; r ;; let ( !! ) a = (* negation *) let r = Array.create 16 0 in for i=0 to 15 do r.(i) <- a.(i) lxor 0xffff done; r ;; let ( ?! ) a = (* not null? *) let n = ref 0 in for i=0 to 15 do n := !n lor a.(i) done; !n <> 0 ;; let set_include a n = (* include in set -- this is in-place modification! *) a.( n lsr 4 ) <- a.( n lsr 4 ) lor (1 lsl (n land 15)) ;; let set_exclude a n = (* exclude from set -- this is in-place modification! *) a.( n lsr 4 ) <- a.( n lsr 4 ) land ((1 lsl (n land 15)) lxor 0xffff) ;; let member_of_set n a = (* (a.( n lsr 4 ) land (1 lsl (n land 15))) <> 0 *) (a.( n lsr 4 ) lsr (n land 15)) land 1 <> 0 ;; let word_set() = let a = dup_set (empty_set()) in List.iter (fun c -> set_include a (Char.code c)) [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm'; 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M'; 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z'; '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; '_' ]; a ;; let noword_set() = let a = word_set() in !! a ;; let set_as_string set = let s = String.make 32 ' ' in for i = 0 to 15 do s.[i+i] <- Char.chr (set.(i) land 0xff); s.[i+i+1] <- Char.chr (set.(i) lsr 8); done; s ;; (**********************************************************************) exception Found of variable list ;; let rec first_character ml = (* return a set of characters s: all non-empty strings that ml matches have * initial characters that are element of s; if ml matches the empty string * then full_set() is returned. *) match ml with [] -> full_set() | Literal "" :: ml' -> first_character ml' | Literal s :: _ -> let cs = dup_set(empty_set()) in set_include cs (Char.code s.[0]); cs | Anystring :: _ -> full_set() | Lazystring :: _ -> full_set() | Anychar :: _ -> full_set() | Anystring_from s :: _ -> full_set() | Lazystring_from s :: _ -> full_set() | Anychar_from s :: _ -> s | Nullstring :: ml' -> first_character ml' | Alternative l :: _ -> List.fold_left (fun s x -> s +! (first_character x)) (empty_set()) l | Optional ml1 :: ml2 -> (first_character ml1) +! (first_character ml2) | Record (v,ml1) :: ml2 -> first_character ml1 | Scanner f :: _ -> full_set() ;; let match_string_at ml s k = let len = String.length s in let rec run k ml recs = (* returns () meaning that nothing has been found, or * Found recs'. * 'k': position in s * 'ml': matcher list to process * 'recs': recorded sections up to now * 'Some recs'': total list of recorded sections *) match ml with [] -> if k = len then raise(Found recs) | Literal x :: ml' -> let xlen = String.length x in begin match xlen with 0 -> run k ml' recs | 1 -> if k+1 <= len && s.[k] = x.[0] then run (k+1) ml' recs | 2 -> if k+2 <= len && s.[k] = x.[0] && s.[k+1] = x.[1] then run (k+2) ml' recs | 3 -> if k+3 <= len && s.[k] = x.[0] && s.[k+1] = x.[1] && s.[k+2] = x.[2] then run (k+3) ml' recs | 4 -> if k+4 <= len && s.[k] = x.[0] && s.[k+1] = x.[1] && s.[k+2] = x.[2] && s.[k+3] = x.[3] then run (k+4) ml' recs | _ -> if k + xlen <= len && String.sub s k xlen = x then run (k+xlen) ml' recs (* this is still not optimal *) end | Anystring :: ml' -> run len ml' recs; let ml'fc = first_character ml' in let rec find n = if n >= 0 then begin if member_of_set (Char.code s.[k+n]) ml'fc then run (k+n) ml' recs; find (n-1) end in find (len-k-1) | Lazystring :: ml' -> let ml'fc = first_character ml' in let max = len-k in let max3 = max - 3 in let rec find n = if n < max3 then begin let c1 = Char.code s.[k+n] in if member_of_set c1 ml'fc then run (k+n) ml' recs; let c2 = Char.code s.[k+n+1] in if member_of_set c2 ml'fc then run (k+n+1) ml' recs; let c3 = Char.code s.[k+n+2] in if member_of_set c3 ml'fc then run (k+n+2) ml' recs; let c4 = Char.code s.[k+n+3] in if member_of_set c4 ml'fc then run (k+n+3) ml' recs; find (n+4) end else if n <= max then begin run (k+n) ml' recs; find (n+1) end in find 0 | Anystring_from set :: ml' -> let rec region n = if k+n < len then let c = Char.code (s.[k+n]) in if member_of_set c set then region (n+1) else n else n in let max = region 0 in run (k+max) ml' recs; let ml'fc = first_character ml' in let rec find n = if n >= 3 then begin if member_of_set (Char.code s.[k+n]) ml'fc then run (k+n) ml' recs; if member_of_set (Char.code s.[k+n-1]) ml'fc then run (k+n-1) ml' recs; if member_of_set (Char.code s.[k+n-2]) ml'fc then run (k+n-2) ml' recs; if member_of_set (Char.code s.[k+n-3]) ml'fc then run (k+n-3) ml' recs; find (n-4) end else if n >= 0 then begin if member_of_set (Char.code s.[k+n]) ml'fc then run (k+n) ml' recs; find (n-1) end in find (max-1) | Lazystring_from set :: ml' -> let rec region n = if k+n < len then let c = Char.code (s.[k+n]) in if member_of_set c set then region (n+1) else n else n in let max = region 0 in let ml'fc = first_character ml' in let rec find n = if n < max then begin if member_of_set (Char.code s.[k+n]) ml'fc then run (k+n) ml' recs; find (n+1) end else if n = max then run (k+max) ml' recs in find 0 | Anychar :: ml' -> if k < len then run (k+1) ml' recs | Anychar_from set :: ml' -> if k < len then let c = Char.code (s.[k]) in if member_of_set c set then run (k+1) ml' recs | Nullstring :: ml' -> run k ml' recs | Alternative alts :: ml' -> let rec find alts = match alts with [] -> () | alt :: alts' -> run k (alt @ ml') recs; find alts' in find alts | Optional opt :: ml' -> run k (opt @ ml') recs; run k ml' recs | Record (r, recorded) :: ml' -> if r.found then failwith "string_match: the same variable matches twice"; if r.begun then begin (* ==> recorded = [] *) let old_len = r.len in r.found <- true; r.len <- k - r.from; run k ml' (r::recs); r.found <- false; r.len <- old_len end else begin let old_from = r.from in r.begun <- true; r.from <- k; run k (recorded @ (Record(r,[]) :: ml')) recs; r.begun <- false; r.from <- old_from end | Scanner f :: ml' -> let n = f (String.sub s k (len-k)) in if k+n > len then failwith "match"; run (k+n) ml' recs in try let recs = try run k ml []; raise Not_found with Found r -> r in List.iter (fun r -> if r.found then r.sref <- s) recs; true with Not_found -> false ;; let match_string ml s = let rec reset ml = match ml with [] -> () | Alternative alts :: ml' -> List.iter reset alts; reset ml' | Optional opt :: ml' -> reset opt; reset ml' | Record (v,r) :: ml' -> v.found <- false; v.begun <- false; reset r; reset ml' | _ :: ml' -> reset ml' in reset ml; match_string_at ml s 0 ;; let var s = { sref = s; found = false; begun = false; from = 0; len = String.length s } ;; type replacer = ReplaceLiteral of string | ReplaceVar of variable | ReplaceFunction of (unit -> string) ;; type rflag = Anchored | Limit of int ;; type repl = RLit of string | RRegion of (int * int) ;; exception Limit_exceeded;; let replace_matched_substrings ml rl fl s = let anchored = List.mem Anchored fl in let all = var "" in let ml' = [ Record(all, ml)] @ (if anchored then [] else [ Anystring ]) in let rec resetlist ml = match ml with [] -> [] | Alternative alts :: ml' -> List.flatten (List.map resetlist alts) @ resetlist ml' | Optional opt :: ml' -> resetlist opt @ resetlist ml' | Record (v,r) :: ml' -> v :: (resetlist r @ resetlist ml') | _ :: ml' -> resetlist ml' in let resl = resetlist ml' in let limit = List.fold_left (fun m f -> match f with Limit n -> if n < 0 then failwith "replace_matched_substrings"; if m >= 0 then min m n else n | _ -> m) (-1) fl in let n_repl = ref 0 in let replace_at k = if limit >= 0 && !n_repl >= limit then [], (-1) else begin List.iter (fun v -> v.found <- false; v.begun <- false) resl; if match_string_at ml' s k then begin (* interpret rl *) try let repltext = List.map (fun r -> match r with ReplaceLiteral s -> RLit s | ReplaceVar v -> if v.found then RRegion (v.from, v.len) else RLit "" | ReplaceFunction f -> begin try RLit (f ()) with Not_found -> raise Not_found | Match_failure (_,_,_) -> raise Not_found end) rl in let amount = all.len in incr n_repl; repltext, amount with Not_found -> [], (-1) end else [], (-1) end in let l = String.length s in let ml'fc = first_character ml' in let rec left_to_right trans k_gapstart k = let rec ltor k = if k < (l-1) then begin if not (member_of_set (Char.code s.[k]) ml'fc ) then begin if not (member_of_set (Char.code s.[k+1]) ml'fc ) then begin ltor (k+2) end else try_match trans k_gapstart (k+1) end else try_match trans k_gapstart k end else if k <= l then (* Note k<=l: this criterion could be much better *) try_match trans k_gapstart k else RRegion(k_gapstart, k-k_gapstart-1) :: trans in ltor k and try_match trans k_gapstart k = let repltext, amount = replace_at k in if amount >= 0 then begin left_to_right (repltext @ [RRegion(k_gapstart, k-k_gapstart)] @ trans) (k + amount) (if amount=0 then k+1 else k+amount) end else left_to_right trans k_gapstart (k+1) in let with_anchors () = try let repltext, amount = replace_at 0 in repltext with Not_found -> [ RRegion(0, l) ] | Limit_exceeded -> [ RRegion(0, l) ] in let rec total_length n trans = match trans with RLit s :: trans' -> total_length (n+String.length s) trans' | RRegion (_,len) :: trans' -> total_length (n+len) trans' | [] -> n in let rec form_replacement_ltor target trans j = match trans with RLit t :: trans' -> let ls = String.length t in let j' = j - ls in if ls > 0 then String.blit t 0 target j' ls; form_replacement_ltor target trans' j' | RRegion (from,len) :: trans' -> let j' = j - len in if len > 0 then String.blit s from target j' len; form_replacement_ltor target trans' j' | [] -> () in (* TODO: interpret rtol, * what's with initialization of variables? *) let transformer = if anchored then with_anchors() else left_to_right [] 0 0 in let length = total_length 0 transformer in let target = String.create length in form_replacement_ltor target transformer length; target, !n_repl ;; let var_matched v = v.found ;; let string_of_var v = String.sub v.sref v.from v.len ;; let found_string_of_var v = if v.found then String.sub v.sref v.from v.len else raise Not_found ;; let mkset s = let l = String.length s in let k = ref (-1) in let c = ref ' ' in let next_noesc() = incr k; if ( !k < l ) then begin c := s.[ !k ]; end in let set = dup_set (empty_set()) in let add_char c = let code = Char.code c in set_include set code in let add_range c1 c2 = let code1 = Char.code c1 in let code2 = Char.code c2 in for i = code1 to code2 do set_include set i done in let continue = ref true in next_noesc(); while !continue && !k < l do match () with | () when (!k + 2 < l) && (s.[!k + 1] = '-') -> (* range *) add_range !c (s.[!k + 2]); next_noesc(); next_noesc(); next_noesc(); | () -> add_char !c; next_noesc(); done; set ;; let mknegset s = !! (mkset s) ;; (* ====================================================================== * History: * * $Log: xstr_match.ml,v $ * Revision 1.9 2002/07/07 11:27:16 gerd * Fixed Xstr_match.mkset * * Revision 1.8 2000/09/23 13:43:22 gerd * Bugfix in replace_matched_substrings. * * Revision 1.7 1999/07/08 02:41:10 gerd * Bugfix in 'Record' matching: the variable was not reset to * its old values in the case the matching fails. * * Revision 1.6 1999/07/06 21:29:33 gerd * Optimizations in the 'replace_matched_substrings' function. * * Revision 1.5 1999/07/06 00:47:53 gerd * Added optimization 'first_character'. * Now 'run' raises an exception in the case of a success, and * otherwise returns () - the exact opposite as before. * Many more optimizations for 'match_string'. * * Revision 1.4 1999/07/05 22:34:57 gerd * match_string: simplifications; now much more tail recursions. * * Revision 1.3 1999/07/05 21:42:46 gerd * Bugfix: When Record(_,_) records in a loop, the state of the * variable was not cleared after every cycle. This is done now. * * Revision 1.2 1999/07/04 20:02:07 gerd * Added Lazystring, Lazystring_from. * Added replace_matched_substring function. * Changed the structure of 'variable'. 'sref' is either an arbitrary * string, or it is the input string of the matching function. 'from' and * 'len' are always used. * * Revision 1.1 1999/06/27 23:03:37 gerd * Initial revision. * * *) xstr/xstr_match.mli100644 764 144 17541 6737736700 13331 0ustar gerdusers(* $Id: xstr_match.mli,v 1.2 1999/07/04 20:02:08 gerd Exp $ * ---------------------------------------------------------------------- * Matching strings *) (* Copyright 1999 by Gerd Stolpmann *) type variable (* A 'variable' can record matched regions *) type charset (* sets of characters *) type matcher = Literal of string | Anystring | Lazystring | Anychar | Anystring_from of charset | Lazystring_from of charset | Anychar_from of charset | Nullstring | Alternative of matcher list list | Optional of matcher list | Record of (variable * matcher list) | Scanner of (string -> int) ;; (* Literal s: matches literally s and nothing else * Anystring/Lazystring matches a string of arbitrary length with arbitrary * contents * Anystring_from s/ * Lazystring_from s matches a string of arbitrary length with characters * from charset s * Anychar: matches an arbitrary character * Anychar_from s: matches a character from charset s * Nullstring: matches the empty string * Alternative * [ ml1; ml2; ... ] * first tries the sequence ml1, then ml2, and so on * until one of the sequences leads to a match of the * whole string * Optional ml: first tries the sequence ml, them the empty string. * = Alternative [ml; [Nullstring]] * Record (v, ml): matches the same as ml, but the region of the string * is recorded in v * Scanner f: f s is called where s is the rest to match. The function * should return the number of characters it can match, * or raise Not_found *) val match_string : matcher list -> string -> bool (* match_string ml s: * Tries to match 'ml' against the string 's'; returns true on success, and * false otherwise. * As side-effect, the variables in 'ml' are set. * Matching proceeds from left to right, and for some of the matchers there * are particular matching orders. The first match that is found using * this order is returned (i.e. the variables get their values from this * match). * Notes: * - Anystring and Anystring_from are "greedy"; they try to match as much * as possible. * - In contrast to this, Lazystring and Lazystring_from are "lazy"; they * try to match as few as possible. * - Alternatives are tested from left to right. * - Options are first tested with argument, then with the empty string * (i.e. "greedy") *) type replacer = ReplaceLiteral of string | ReplaceVar of variable | ReplaceFunction of (unit -> string) ;; type rflag = Anchored | Limit of int (* | RightToLeft *) ;; val replace_matched_substrings : matcher list -> replacer list -> rflag list -> string -> (string * int) (* replace_matched_substrings ml rl fl s: * * All substrings of 's' are matched against 'ml' in turn, and all * non-overlapping matchings are replaced according 'rl'. The standard * behaviour is to test from left to right, and to replace all occurences * of substrings. * This can be modified by 'fl': * - Anchored: Not the substrings of 's', but only 's' itself is * matched against 'ml'. * - Limit n: At most 'n' replacements will be done. * - RightToLeft: Begin with the rightmost matching; proceed with more * left matchings (NOT YET IMPLEMENTED!!!!) * The meaning of 'rl': Every matching is replaced by the sequence of * the elements of 'rl'. * - ReplaceLiteral t: Replace the string t * - ReplaceVar v: Replace the contents of 'v' or the empty string, * if v has no matching * - ReplaceFunction f: Replace f(). You may raise Not_found or * Match_failure to skip to the next matching. * 'replace_matched_substrings' returns the number of replacements. *) val var : string -> variable (* var s: creates new variable with initial value s. If this variable * is used in a subsequent matching, and a value is found, the value * is overwritten; otherwise the old value persists. * - Initial vales are stored as references to strings * - Matched values are stored as triples (s,from,len) where 's' is the * input string of the matching function * * [Note thread-safety: variables must not be shared by multiple threads.] *) val var_matched : variable -> bool (* returns true if the variable matched a value in the last match_string *) val string_of_var : variable -> string (* returns the current value of the variable *) val found_string_of_var : variable -> string (* returns the current value of the variable only if there was a match * for this variable in the last match_string; otherwise raise Not_found *) val mkset : string -> charset (* creates a set from readable description. The string simply enumerates * the characters of the set, and the notation "x-y" is possible, too. * To include '-' in the set, put it at the beginning or end. *) val mknegset : string -> charset (* creates the complement that mkset would create *) (* ---------------------------------------------------------------------- *) (* EXAMPLE: * * let v = var "" in * let _ = match_string [ Literal "("; Record (v, [Anystring]); Literal ")" ] * s * in found_string_of_var v * * - if s is "(abc)" returns "abc" * - if the parantheses are missing, raises Not_found * * VARIANT I: * * let v = var "" in * let _ = match_string [ Lazystring; * Literal "("; Record (v, [Lazystring]); Literal ")"; * Anystring ] * s * in found_string_of_var v * * - finds the first substring with parantheses, e.g. * s = "abc(def)ghi(jkl)mno" returns "def" * * To get the last substring, swap Lazystring and Anystring at the beginning * resp. end. * * VARIANT II: * * let v = var "" in * let _ = match_string [ Lazystring; * Literal "("; Record (v, [Anystring]); Literal ")"; * Anystring ] * s * in found_string_of_var v * * - for s = "abc(def)ghi(jkl)mno" it is returned "def)ghi(jkl" *) (* ---------------------------------------------------------------------- *) (* EXAMPLE: * * let v = var "" in * let digits = mkset "0-9" in * let digits_re = [ Record(v, [ Anychar_from digits; Anystring_from digits])] * in * replace_matched_substrings digits_re [ ReplaceLiteral "D" ] [] "ab012cd456fg" * * yields: ("abDcdDfg", 2) * * VARIANT I: * * replace_matched_substrings digits_re [ ReplaceLiteral "D" ] * [ Limit 1 ] "ab012cd456fg" * * yields: ("abDcd456fg", 1) * * VARIANT II: * * replace_matched_substrings digits_re [ ReplaceLiteral "D" ] * [ Anchored ] "ab012cd456fg" * * yields: ("ab012cd456fg", 0) * * VARIANT III: * * replace_matched_substrings digits_re [ ReplaceLiteral "D" ] * [ Anchored ] "012" * * yields: ("D", 1) * * VARIANT IV: * * let f() = string_of_int(1+int_of_string(string_of_var v)) in * replace_matched_substrings digits_re [ ReplaceFunction f ] * [] "ab012cd456fg" * * yields: ("ab13cd457fg", 2) *) (* ====================================================================== * History: * * $Log: xstr_match.mli,v $ * Revision 1.2 1999/07/04 20:02:08 gerd * Added Lazystring, Lazystring_from. * Added replace_matched_substring function. * Changed the structure of 'variable'. 'sref' is either an arbitrary * string, or it is the input string of the matching function. 'from' and * 'len' are always used. * * Revision 1.1 1999/06/27 23:03:38 gerd * Initial revision. * * *) xstr/Makefile100644 764 144 4302 7163125356 12052 0ustar gerdusers# make all: make bytecode archive # make opt: make native archive # make install: install bytecode archive, and if present, native archive # make uninstall: uninstall package # make clean: remove intermediate files # make distclean: remove any superflous files # make release: cleanup, create archive, tag CVS module # (for developers) #---------------------------------------------------------------------- # specific rules for this package: OBJECTS = xstr_split.cmo xstr_search.cmo xstr_match.cmo XOBJECTS = xstr_split.cmx xstr_search.cmx xstr_match.cmx ARCHIVE = xstr.cma XARCHIVE = xstr.cmxa NAME = xstr #REQUIRES = UNSAFE = # you may try this: (0% to 10% faster) #UNSAFE = -unsafe all: $(ARCHIVE) opt: $(XARCHIVE) $(ARCHIVE): $(OBJECTS) $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS) $(XARCHIVE): $(XOBJECTS) $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS) #---------------------------------------------------------------------- # general rules: OPTIONS = OCAMLC = ocamlc -g $(UNSAFE) $(OPTIONS) $(ROPTIONS) OCAMLOPT = ocamlopt $(UNSAFE) $(OPTIONS) $(ROPTIONS) OCAMLDEP = ocamldep $(OPTIONS) OCAMLFIND = ocamlfind depend: *.ml *.mli $(OCAMLDEP) *.ml *.mli >depend #depend.pkg: Makefile # $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg .PHONY: install install: all { test ! -f $(XARCHIVE) || extra="*.cmxa *.a"; }; \ $(OCAMLFIND) install $(NAME) *.mli *.cmi *.cma META $$extra .PHONY: uninstall uninstall: $(OCAMLFIND) remove $(NAME) .PHONY: clean clean: rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa $(MAKE) -C speed clean .PHONY: distclean distclean: clean rm -f *~ depend depend.pkg rm -f speed/*~ RELEASE: META awk '/version/ { print substr($$3,2,length($$3)-2) }' META >RELEASE .PHONY: dist dist: RELEASE r=`head -1 RELEASE`; cd ..; gtar czf $(NAME)-$$r.tar.gz --exclude='*/CVS*' --exclude="*/depend.pkg" --exclude="*/depend" $(NAME) .PHONY: tag-release tag-release: RELEASE r=`head -1 RELEASE | sed -e s/\\\./-/g`; cd ..; cvs tag -F $(NAME)-$$r $(NAME) .PHONY: release release: distclean $(MAKE) tag-release $(MAKE) dist .ml.cmx: $(OCAMLOPT) -c $< .ml.cmo: $(OCAMLC) -c $< .mli.cmi: $(OCAMLC) -c $< .SUFFIXES: .cmo .cmi .cmx .ml .mli include depend #include depend.pkg xstr/META100644 764 144 132 7512022602 11024 0ustar gerdusersversion = "0.2.1" requires = "" archive(byte) = "xstr.cma" archive(native) = "xstr.cmxa" xstr/speed/ 40755 764 144 0 6740731237 11417 5ustar gerdusersxstr/speed/with_xstr.ml100644 764 144 10741 6740726004 14120 0ustar gerdusersopen Xstr_search open Xstr_split open Xstr_match let invoke f name n x = let t1 = Sys.time() in for i = 1 to n do f x done; let t2 = Sys.time() in Printf.printf "Test %s lasts %f seconds\n" name ((t2 -. t1) /. float n); flush stdout ;; (* TEST DATA *) let rec rep n = if n = 0 then [] else "mouse" :: rep (n-1);; let s1 = String.make 5000 ' ' ^ "mouse" ^ String.make 5000 ' ';; let s2 = String.make 25 ' ' ^ "mouse" ^ String.make 25 ' ';; let s3 = String.make 500 '0' ^ "0b10010";; let s4 = String.concat " " (rep 1000);; let s5 = String.concat " " (rep 10);; let s6 = String.concat "\\" (rep 1000);; let s7 = String.concat "\\" (rep 10);; (* PATTERN MATCHING: *) (* Test 1.1. * Find a word in a big string (10K) *) let test_1_1() = let v = var "" in let cs = mknegset " " in let ml = [ Lazystring; Record (v,[Anychar_from cs; Anystring_from cs]); Anystring ] in let _ = match_string ml s1 in if found_string_of_var v <> "mouse" then failwith "Bad result" ;; (* Test 1.2. * Find a word in a small string (50 bytes) *) let test_1_2() = let v = var "" in let cs = mknegset " " in let ml = [ Lazystring; Record (v,[Anychar_from cs; Anystring_from cs]); Anystring ] in let _ = match_string ml s2 in if found_string_of_var v <> "mouse" then failwith "Bad result" ;; (* Test 1.3. * Backtracking test: Find either hexadecimal, octal, or * binary constants in a string of digits *) let test_1_3() = let v = var "" in let cshex = mkset "0-9a-f" in let csoct = mkset "0-7" in let csbin = mkset "0-1" in let ml = [ Lazystring; Record (v, [ Alternative [ [ Literal "0x"; Anychar_from cshex; Anystring_from cshex ]; [ Literal "0o"; Anychar_from csoct; Anystring_from csoct ]; [ Literal "0b"; Anychar_from csbin; Anystring_from csbin ] ]]); ] in let _ = match_string ml s3 in if found_string_of_var v <> "0b10010" then failwith "Bad result" ;; (* REPLACE PATTERNS *) (* Test 2.1: * Replace a word in a big string *) let test_2_1() = let s1 = String.make 5000 ' ' ^ "mouse" ^ String.make 5000 ' ' in let cs = mknegset " " in let ml = [ Anychar_from cs; Anystring_from cs ] in let (r,_) = replace_matched_substrings ml [ ReplaceLiteral "cat" ] [] s1 in if String.sub r 4999 5 <> " cat " then failwith "Bad result" ;; (* Test 2.2: * Replace lots of words in a big string *) let test_2_2() = let cs = mknegset " " in let ml = [ Anychar_from cs; Anystring_from cs ] in let (r,_) = replace_matched_substrings ml [ ReplaceLiteral "cat" ] [] s4 in if String.sub r (25*4-1) 5 <> " cat " then failwith "Bad result" ;; (* SPLITTING *) (* Test 3.1: * Split a big string into space-separated words *) let test_3_1() = let r = split_string "" false false [" "] s4 in if List.hd r <> "mouse" then failwith "Bad result" ;; (* Test 3.2: * Split a small string into words *) let test_3_2() = let r = split_string "" false false [" "] s5 in if List.hd r <> "mouse" then failwith "Bad result" ;; (* SUBSTRING SEARCHING *) (* Test 4.1: * Find a certain substring *) let test_4_1() = let k = index_of_substring s1 "mouse" in if k <> 5000 then failwith "Bad result" ;; (* CHARCTER REPLACING *) (* Test 5.1: * Unquote backslashes in a big string *) let test_5_1() = let s = s6 in let l = String.length s in let r = replace_char s (fun c k -> match c with '\\' -> begin if k+1 < l then raise (Replace_phrase (2, String.make 1 (s.[k+1]))) else raise Not_found end) in if String.sub r 0 10 <> "mousemouse" then failwith "Bad result" ;; (* Test 5.2: * Unquote backslashes in a small string *) let test_5_2() = let s = s7 in let l = String.length s in let r = replace_char s (fun c k -> match c with '\\' -> begin if k+1 < l then raise (Replace_phrase (2, String.make 1 (s.[k+1]))) else raise Not_found end) in if String.sub r 0 10 <> "mousemouse" then failwith "Bad result" ;; (********* invoke tests ************) invoke test_1_1 "Pattern matching 1" 100 (); invoke test_1_2 "Pattern matching 2" 10000 (); invoke test_1_3 "Pattern matching 3" 1000 (); invoke test_2_1 "Pattern replacing 1" 100 (); invoke test_2_2 "Pattern replacing 2" 100 (); invoke test_3_1 "Splitting 1" 100 (); invoke test_3_2 "Splitting 2" 1000 (); invoke test_4_1 "Substring searching" 1000 (); invoke test_5_1 "Unquoting 1" 100 (); invoke test_5_2 "Unquoting 2" 10000 (); () xstr/speed/Makefile100644 764 144 1237 6740726314 13157 0ustar gerdusersall: with_xstr_byte with_xstr_opt with_str_byte with_str_opt clean: rm -f *.cmi *.cmo *.cmx *.o gmon.out rm -f with_str_byte with_str_opt with_xstr_byte with_xstr_opt rm -f with_xstr_opt_p with_xstr_byte: ../xstr.cma with_xstr.ml ocamlc -o with_xstr_byte -I .. xstr.cma with_xstr.ml with_xstr_opt: ../xstr.cmxa with_xstr.ml ocamlopt -o with_xstr_opt -I .. xstr.cmxa with_xstr.ml with_xstr_opt_p: ../xstr.cmxa with_xstr.ml ocamlopt -o with_xstr_opt_p -p -I .. xstr.cmxa with_xstr.ml with_str_byte: with_str.ml ocamlc -o with_str_byte -custom str.cma with_str.ml -cclib -lstr with_str_opt: with_str.ml ocamlopt -o with_str_opt str.cmxa with_str.ml -cclib -lstr xstr/speed/with_str.ml100644 764 144 6571 6740726027 13723 0ustar gerdusersopen Str let invoke f name n x = let t1 = Sys.time() in for i = 1 to n do f x done; let t2 = Sys.time() in Printf.printf "Test %s lasts %f seconds\n" name ((t2 -. t1) /. float n); flush stdout ;; (* TEST DATA *) let rec rep n = if n = 0 then [] else "mouse" :: rep (n-1);; let s1 = String.make 5000 ' ' ^ "mouse" ^ String.make 5000 ' ';; let s2 = String.make 25 ' ' ^ "mouse" ^ String.make 25 ' ';; let s3 = String.make 500 '0' ^ "0b10010";; let s4 = String.concat " " (rep 1000);; let s5 = String.concat " " (rep 10);; let s6 = String.concat "\\" (rep 1000);; let s7 = String.concat "\\" (rep 10);; (* PATTERN MATCHING: *) (* Test 1.1. * Find a word in a big string (10K) *) let test_1_1() = let test_1_1_re = regexp "[^ ]+" in let _ = search_forward test_1_1_re s1 0 in if matched_string s1 <> "mouse" then failwith "Bad result" ;; (* Test 1.2. * Find a word in a small string (50 bytes) *) let test_1_2() = let test_1_2_re = regexp "[^ ]+" in let _ = search_forward test_1_2_re s2 0 in if matched_string s2 <> "mouse" then failwith "Bad result" ;; (* Test 1.3. * Backtracking test: Find either hexadecimal, octal, or * binary constants in a string of digits *) let test_1_3() = let test_1_3_re = regexp "0x[0-9a-f]+\\|0o[0-7]+\\|0b[0-1]+" in let _ = search_forward test_1_3_re s3 0 in if matched_string s3 <> "0b10010" then failwith "Bad result" ;; (* Test 2.1: * Replace a word in a big string *) let test_2_1() = let test_2_1_re = regexp "[^ ]+" in let r = global_replace test_2_1_re "cat" s1 in if String.sub r 4999 5 <> " cat " then failwith "Bad result" ;; (* Test 2.2: * Replace lots of words in a big string *) let test_2_2() = let test_2_2_re = regexp "[^ ]+" in let r = global_replace test_2_2_re "cat" s4 in if String.sub r (25*4-1) 5 <> " cat " then failwith "Bad result" ;; (* Test 3.1: * Split a big string into space-separated words *) let test_3_1() = let r = split_delim (regexp " ") s4 in if List.hd r <> "mouse" then failwith "Bad result" ;; (* Test 3.2: * Split a small string into words *) let test_3_2() = let r = split_delim (regexp " ") s5 in if List.hd r <> "mouse" then failwith "Bad result" ;; (* Test 4.1: * Find a certain substring *) let test_4_1() = let k = search_forward (regexp (quote "mouse")) s1 0 in if k <> 5000 then failwith "Bad result" ;; (* Test 5.1: * Unquote backslashes in a big string *) let test_5_1() = let s = s6 in let r = global_substitute (regexp "\\\\.") (fun s -> String.make 1 (s.[match_beginning()+1])) s in if String.sub r 0 10 <> "mousemouse" then failwith "Bad result" ;; (* Test 5.2: * Unquote backslashes in a small string *) let test_5_2() = let s = s7 in let r = global_substitute (regexp "\\\\.") (fun s -> String.make 1 (s.[match_beginning()+1])) s in if String.sub r 0 10 <> "mousemouse" then failwith "Bad result" ;; (********* invoke tests ************) invoke test_1_1 "Pattern matching 1" 100 (); invoke test_1_2 "Pattern matching 2" 10000 (); invoke test_1_3 "Pattern matching 3" 1000 (); invoke test_2_1 "Pattern replacing 1" 100 (); invoke test_2_2 "Pattern replacing 2" 100 (); invoke test_3_1 "Splitting 1" 100 (); invoke test_3_2 "Splitting 2" 1000 (); invoke test_4_1 "Substring searching" 1000 (); invoke test_5_1 "Unquoting 1" 100 (); invoke test_5_2 "Unquoting 2" 10000 (); () xstr/LICENSE100644 764 144 2051 6740731057 11417 0ustar gerdusersCopyright 1999 by Gerd Stolpmann The package "xstr" is copyright by Gerd Stolpmann. Permission is hereby granted, free of charge, to any person obtaining a copy of the "xstr" software (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. The Software is provided ``as is'', without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. In no event shall Gerd Stolpmann be liable for any claim, damages or other liability, whether in an action of contract, tort or otherwise, arising from, out of or in connection with the Software or the use or other dealings in the software.