pax_global_header00006660000000000000000000000064122512200520014501gustar00rootroot0000000000000052 comment=ea743af7bd44719f3f9758805028d1e016fc57a5 approx-5.4/000077500000000000000000000000001225122005200126625ustar00rootroot00000000000000approx-5.4/Makefile000066400000000000000000000020451225122005200143230ustar00rootroot00000000000000# approx: proxy server for Debian archive files # Copyright (C) 2013 Eric C. Cooper # Released under the GNU General Public License OCAMLBUILD := ocamlbuild OCAMLBUILD_OPTS := -classic-display TARGET := native ifeq ($(TARGET),byte) OCAMLBUILD_OPTS += -byte-plugin endif programs = approx approx-gc approx-import all: $(programs) approx: $(OCAMLBUILD) $(OCAMLBUILD_OPTS) approx.$(TARGET) cp -p _build/approx.$(TARGET) $@ approx-gc: $(OCAMLBUILD) $(OCAMLBUILD_OPTS) gc_cache.$(TARGET) cp -pv _build/gc_cache.$(TARGET) $@ approx-import: $(OCAMLBUILD) $(OCAMLBUILD_OPTS) import.$(TARGET) cp -pv _build/import.$(TARGET) $@ $(programs): $(wildcard *.ml*) clean: $(OCAMLBUILD) $(OCAMLBUILD_OPTS) -clean rm -f $(programs) .PHONY: tests tests: $(subst .ml,,$(wildcard tests/*.ml)) %_test: $(OCAMLBUILD) $(OCAMLBUILD_OPTS) $@.$(TARGET) version := $(shell sed -n 's/^let version = "\(.*\)"$$/\1/p' config.ml) package := approx-$(version) tarball := $(package).tar.gz tarball: git archive -o $(tarball) --prefix $(package)/ HEAD approx-5.4/_tags000066400000000000000000000010631225122005200137020ustar00rootroot00000000000000# approx: proxy server for Debian archive files # Copyright (C) 2013 Eric C. Cooper # Released under the GNU General Public License <**/*.{mli,ml}>: warn_error_A : use_pcre, use_netstring, use_netcgi, use_nethttpd : use_pcre : use_pcre : use_netsys : use_netstring : use_pcre, use_sha <**/*.{byte,native}>: use_unix, use_pcre, use_sha <*.{byte,native}>: use_bigarray, use_netsys_oothr, use_netsys, use_str, use_netstring : use_equeue, use_netcgi, use_nethttpd approx-5.4/approx.ml000066400000000000000000000441101225122005200145250ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2013 Eric C. Cooper Released under the GNU General Public License *) open Printf open Unix open Unix.LargeFile open Config open Log open Program open Util (* Hint that a download is in progress *) let in_progress name = name ^ ".hint" let wait_for_download_in_progress name = let hint = in_progress name in let timeout = float_of_int max_wait in let rec wait n = match stat_file hint with | Some { st_mtime = mtime } -> if time () -. mtime > timeout then begin error_message "Concurrent download of %s is taking too long" name; (* remove the other process's hint file if it still exists, so we can create our own *) rm hint end else begin if n = 0 then debug_message "Waiting for concurrent download of %s" name; sleep 1; wait (n + 1) end | None -> () in wait 0 let debug_headers msg headers = debug_message "%s" msg; List.iter (fun (x, y) -> debug_message " %s: %s" x y) headers let proxy_headers size modtime = let headers = ["Content-Type", "text/plain"; "Content-Length", Int64.to_string size] in if modtime = 0. then headers else ("Last-Modified", Url.string_of_time modtime) :: headers type local_status = | Done of Nethttpd_types.http_service_reaction | Cache_miss of float let head_request env = env#cgi_request_method = "HEAD" (* Deliver a file from the local cache *) let deliver_local name env = debug_message " => delivering from cache"; let size = file_size name in env#set_output_header_fields (proxy_headers size (file_modtime name)); debug_headers "Local response" env#output_header_fields; let file = if head_request env then "/dev/null" else cache_dir ^/ name in Done (`File (`Ok, None, file, 0L, size)) let not_modified () = debug_message " => not modified"; Done (`Std_response (`Not_modified, None, None)) let nak () = debug_message " => not found (cached)"; Done (`Std_response (`Not_found, None, None)) (* The modification time (mtime) tells when the contents of the file last changed, and is used by the "If-Modified-Since" logic. The last status change time (ctime) is used to indicate when a file was last "verified" by contacting the remote repository. Whenever we learn that the file is still valid via a "Not Modified" response, we update the ctime so that the file will continue to be considered current. *) let print_age mod_time ctime = if debug then begin debug_message " last modified: %s" (Url.string_of_time mod_time); debug_message " last verified: %s" (Url.string_of_time ctime) end (* "File not found" or NAK responses are cached as empty files with permissions = 0. Create a cached NAK as an empty temp file, set its permissions, then atomically rename it. *) let cache_nak file = debug_message " caching \"file not found\""; make_directory (Filename.dirname file); let tmp_file = gensym file in let chan = open_out_excl tmp_file in close_out chan; Unix.chmod tmp_file 0; Sys.rename tmp_file file (* Attempt to serve the requested file from the local cache. Deliver immutable files and valid index files from the cache. Deliver Release files if they are not too old. Otherwise contact the remote repository. *) let serve_local name ims env = wait_for_download_in_progress name; match stat_file name with | Some { st_mtime = mod_time; st_ctime = ctime; st_size = size; st_perm = perm } -> let deliver_if_newer () = if mod_time > ims then deliver_local name env else not_modified () in if size = 0L && perm = 0 then begin (* cached NAK *) debug_message " cached \"file not found\""; print_age mod_time ctime; if minutes_old ctime <= interval then nak () else Cache_miss mod_time end else if Release.is_release name then begin print_age mod_time ctime; if minutes_old ctime <= interval then deliver_if_newer () else Cache_miss mod_time end else if Release.immutable name || Release.valid name then deliver_if_newer () else Cache_miss 0. | None -> Cache_miss 0. let create_hint name = make_directory (Filename.dirname name); close (openfile (in_progress name) [O_CREAT; O_WRONLY] 0o644) let remove_hint name = rm (in_progress name) type cache_info = { file : string; tmp_file : string; chan : out_channel } type cache_state = | Cache of cache_info | Pass_through | Undefined (* Don't cache the result of a request for a directory *) let should_pass_through name = if Sys.file_exists name then Sys.is_directory name else let n = String.length name in n = 0 || name.[n - 1] = '/' || not (String.contains name '/') let open_cache file = if should_pass_through file then begin debug_message " pass-through %s" file; Pass_through end else try debug_message " open cache %s" file; make_directory (Filename.dirname file); let tmp_file = gensym file in let chan = open_out_excl tmp_file in Cache { file = file; tmp_file = tmp_file; chan = chan } with e -> error_message "Cannot cache %s" file; raise e let write_cache cache str pos len = match cache with | Cache { chan = chan } -> output chan str pos len | Pass_through -> () | Undefined -> assert false exception Wrong_size let close_cache cache size mod_time = match cache with | Cache { file = file; tmp_file = tmp_file; chan = chan } -> debug_message " close cache %s" file; close_out chan; if size = -1L || size = file_size tmp_file then begin if mod_time <> 0. then begin debug_message " setting mtime to %s" (Url.string_of_time mod_time); utimes tmp_file mod_time mod_time end; Sys.rename tmp_file file end else begin error_message "Size of %s should be %Ld, not %Ld" file size (file_size tmp_file); rm tmp_file; raise Wrong_size end | Pass_through -> () | Undefined -> assert false let remove_cache cache = match cache with | Cache { tmp_file = tmp_file; chan = chan } -> close_out chan; error_message "Removing %s (size: %Ld)" tmp_file (file_size tmp_file); rm tmp_file | Pass_through | Undefined -> () type download_status = | Delivered | Cached | Not_modified | Redirect of string | File_not_found let string_of_download_status = function | Delivered -> "delivered" | Cached -> "cached" | Not_modified -> "not modified" | Redirect url -> "redirected to " ^ url | File_not_found -> "not found" type response_state = { name : string; mutable status : int; mutable length : int64; mutable last_modified : float; mutable location : string; mutable content_type : string; mutable body_seen : bool; mutable cache : cache_state } let new_response url name = { name = name; status = 0; length = -1L; last_modified = 0.; location = url; content_type = "text/plain"; body_seen = false; cache = Undefined } type cgi = Netcgi.cgi_activation let send_header size modtime (cgi : cgi) = let headers = proxy_headers size modtime in let fields = List.map (fun (name, value) -> (name, [value])) headers in cgi#set_header ~status: `Ok ~fields (); debug_headers "Proxy response" cgi#environment#output_header_fields let pass_through_header resp (cgi : cgi) = let fields = ["Content-Type", [resp.content_type]] in let fields = if resp.length < 0L then fields else ("Content-Length", [Int64.to_string resp.length]) :: fields in cgi#set_header ~status: `Ok ~fields (); debug_headers "Pass-through response" cgi#environment#output_header_fields let finish_delivery resp = close_cache resp.cache resp.length resp.last_modified; if resp.length >= 0L || resp.cache = Pass_through then Delivered else Cached let finish_head resp cgi = send_header resp.length resp.last_modified cgi; Delivered let with_pair rex str proc = match Pcre.extract ~rex ~full_match: false str with | [| a; b |] -> proc (a, b) | _ -> assert false let status_re = Pcre.regexp "^HTTP/\\d+\\.\\d+\\s+(\\d{3})\\s+(.*?)\\s*$" let header_re = Pcre.regexp "^(.*?):\\s*(.*?)\\s*$" let process_header resp str = let do_status (code, _) = resp.status <- int_of_string code in let do_header (header, value) = match String.lowercase header with | "content-length" -> (try resp.length <- Int64.of_string value with Failure _ -> error_message "Cannot parse Content-Length %s" value) | "last-modified" -> (try resp.last_modified <- Url.time_of_string value with Invalid_argument _ -> error_message "Cannot parse Last-Modified date %s" value) | "location" -> (try resp.location <- Neturl.string_of_url (Neturl.parse_url value) with Neturl.Malformed_URL -> error_message "Cannot parse Location %s" value) | "content-type" -> (* only used for pass-through content *) resp.content_type <- value | _ -> () in debug_message " %s" str; try with_pair header_re str do_header with Not_found -> (* e.g., status line or CRLF *) try with_pair status_re str do_status with Not_found -> error_message "Unrecognized response: %s" str (* Process a chunk of the response body. If no Content-Length was present in the header, we cache the whole file before delivering it to the client. The alternative -- using chunked transfer encoding -- triggers a bug in APT. *) let process_body resp cgi str pos len = if resp.status = 200 then begin if not resp.body_seen then begin resp.body_seen <- true; assert (resp.cache = Undefined); resp.cache <- open_cache resp.name; if resp.cache = Pass_through then pass_through_header resp cgi else if resp.length >= 0L then send_header resp.length resp.last_modified cgi end; write_cache resp.cache str pos len; if resp.length >= 0L || resp.cache = Pass_through then (* stream the data back to the client as we receive it *) cgi#output#really_output str pos len end (* Download a file from an HTTP or HTTPS repository *) let download_http resp url name ims cgi = let headers = if ims > 0. then ["If-Modified-Since: " ^ Url.string_of_time ims] else [] in let header_callback = process_header resp in let body_callback = process_body resp cgi in let is_head = head_request cgi#environment in let rec loop redirects = resp.status <- 0; if is_head then Url.head resp.location header_callback else Url.download resp.location ~headers ~header_callback body_callback; match resp.status with | 200 -> if is_head then finish_head resp cgi else finish_delivery resp | 304 -> Not_modified | 301 | 302 | 303 | 307 -> if should_pass_through (relative_url resp.location) then begin (* the request was redirected to content that should not be cached, like a directory listing *) remove_cache resp.cache; Redirect resp.location end else if redirects >= max_redirects then begin error_message "Too many redirections for %s" url; File_not_found end else loop (redirects + 1) | 404 -> File_not_found | n -> error_message "Unexpected status code: %d" n; File_not_found in loop 0 (* Download a file from an FTP repository *) let download_ftp resp url name ims cgi = Url.head url (process_header resp); let mod_time = resp.last_modified in debug_message " ims %s mtime %s" (Url.string_of_time ims) (Url.string_of_time mod_time); if 0. < mod_time && mod_time <= ims then Not_modified else if head_request cgi#environment then finish_head resp cgi else begin resp.status <- 200; (* for process_body *) Url.download url (process_body resp cgi); finish_delivery resp end let download_url url name ims cgi = let dl = match Url.protocol url with | Url.HTTP | Url.HTTPS -> download_http | Url.FTP | Url.FILE -> download_ftp in let resp = new_response url name in try create_hint name; unwind_protect (fun () -> dl resp url name ims cgi) (fun () -> remove_hint name) with e -> remove_cache resp.cache; if e <> Failure url then info_message "%s" (string_of_exception e); File_not_found (* Handle any processing triggered by downloading a given file *) let updates_needed = ref [] let cleanup_after url file = if pdiffs && Release.is_pdiff file then (* record the affected index for later update *) let index = Pdiff.index_file file in if not (List.mem index !updates_needed) then begin debug_message "Deferring pdiffs for %s" index; updates_needed := index :: !updates_needed end let copy_to dst src = let len = 4096 in let buf = String.create len in let rec loop () = match input src buf 0 len with | 0 -> () | n -> dst#really_output buf 0 n; loop () in loop () (* Similar to deliver_local, but we have to copy it ourselves *) let copy_from_cache name cgi = wait_for_download_in_progress name; send_header (file_size name) (file_modtime name) cgi; let output = cgi#output in if not (head_request cgi#environment) then with_in_channel open_in name (copy_to output); output#commit_work () (* Update the ctime but not the mtime of the file *) let update_ctime name = match stat_file name with | Some stats -> utimes name stats.st_atime stats.st_mtime; if debug then let ctime = (stat name).st_ctime in debug_message " updated ctime to %s" (Url.string_of_time ctime) | None -> () let redirect url (cgi : cgi) = let url' = try let path = Url.reverse_translate url in cgi#url ~with_script_name: `None ~with_path_info: (`This path) () with Not_found -> url in new Netmime.basic_mime_header ["Location", url'] let serve_remote url name ims mod_time cgi = let respond ?header code = raise (Nethttpd_types.Standard_response (code, header, None)) in let copy_if_newer () = (* deliver the cached copy if it is newer than the client's *) if mod_time > ims then copy_from_cache name cgi else respond `Not_modified in let status = download_url url name (max ims mod_time) cgi in info_message "%s: %s" url (string_of_download_status status); match status with | Delivered -> cgi#output#commit_work (); if not (head_request cgi#environment) then cleanup_after url name | Cached -> copy_from_cache name cgi; cleanup_after url name | Not_modified -> update_ctime name; copy_if_newer () | Redirect url' -> respond `Found ~header: (redirect url' cgi) | File_not_found -> if is_cached_nak name then begin update_ctime name; respond `Not_found end else if offline && Sys.file_exists name then copy_if_newer () else begin cache_nak name; respond `Not_found end let remote_service url name ims mod_time = object method process_body _ = object method generate_response env = let cgi = (* buffered activation runs out of memory on large downloads *) Nethttpd_services.std_activation `Std_activation_unbuffered env in serve_remote url name ims mod_time cgi end end (* Handle a cache miss, either because the file is not present (mod_time = 0) or it hasn't been verified recently enough *) let cache_miss url name ims mod_time = debug_message " => cache miss"; `Accept_body (remote_service url name ims mod_time) let ims_time env = try Netdate.parse_epoch (env#input_header#field "If-Modified-Since") with Not_found | Invalid_argument _ -> 0. let server_error e = backtrace (); `Std_response (`Internal_server_error, None, Some (string_of_exception e)) let static env str = `Static (`Ok, None, if head_request env then "" else str) let serve_file env = (* handle URL-encoded '+', '~', etc. *) match Netencoding.Url.decode ~plus: false env#cgi_request_uri with | "/" -> static env Config.index | "/robots.txt" -> static env "User-agent: *\nDisallow: /\n" | path -> begin try let url, name = Url.translate_request path in if should_pass_through name then cache_miss url name 0. 0. else let ims = ims_time env in match serve_local name ims env with | Done reaction -> reaction | Cache_miss mod_time -> cache_miss url name ims mod_time with | Not_found -> `Std_response (`Not_found, None, None) | e -> server_error e end let process_request env = debug_message "Connection from %s" (string_of_sockaddr env#remote_socket_addr ~with_port: true); let meth = env#cgi_request_method in debug_headers (sprintf "Request: %s %s" meth env#cgi_request_uri) env#input_header_fields; if (meth = "GET" || meth = "HEAD") && env#cgi_query_string = "" then serve_file env else `Std_response (`Forbidden, None, Some "invalid HTTP request") let error_response info = let code = info#response_status_code in let msg = string_of_int code ^ ": " ^ try Nethttp.string_of_http_status (Nethttp.http_status_of_int code) with Not_found -> "???" in let detail = match info#error_message with | "" -> "" | s -> "

" ^ s ^ "

" in sprintf "

%s

%s" msg detail open Nethttpd_reactor let config = object inherit modify_http_reactor_config default_http_reactor_config (* changes from default_http_protocol_config *) method config_announce_server = `Ocamlnet_and ("approx/" ^ version) (* changes from default_http_processor_config *) method config_error_response = error_response method config_log_error _ msg = error_message "%s" msg end let proxy_service = object method name = "proxy_service" method def_term = `Proxy_service method print fmt = Format.fprintf fmt "%s" "proxy_service" method process_header = process_request end let approx () = log_to_syslog (); check_id ~user ~group; Sys.chdir cache_dir; set_nonblock stdin; Nethttpd_reactor.process_connection config stdin proxy_service; List.iter Pdiff.update !updates_needed let () = main_program approx () approx-5.4/config.ml000066400000000000000000000072671225122005200144750ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2013 Eric C. Cooper Released under the GNU General Public License *) open Config_file open Util let version = "5.4" let default_config = "/etc/approx/approx.conf" let extract_config_files () = let rec loop configs args = function | "-c" :: f :: rest | "--config" :: f :: rest -> loop (f :: configs) args rest | x :: rest -> loop configs (x :: args) rest | [] -> List.rev configs, List.rev args in loop [default_config] [] (List.tl (Array.to_list Sys.argv)) let config_files, arguments = extract_config_files () let server_config = ["version", version; "host", Unix.gethostname (); "config", String.concat " " config_files] let () = List.iter (fun file -> try read file with Sys_error _ -> ()) config_files let params = [] let cache_dir = get "$cache" ~default: "/var/cache/approx" let params = ("$cache", cache_dir) :: params let split_cache_path path = let err () = invalid_string_arg "split_cache_path" path in if is_prefix cache_dir path then let i = String.length cache_dir + 1 in let j = try String.index_from path i '/' with Not_found -> err () in substring path ~from: i ~until: j, substring path ~from: (j + 1) else err () let shorten path = if is_prefix cache_dir path then substring path ~from: (String.length cache_dir + 1) else path let interval = get_int "$interval" ~default: 60 let params = ("$interval", string_of_int interval) :: params let max_rate = get "$max_rate" ~default: "unlimited" let params = ("$max_rate", max_rate) :: params let max_redirects = get_int "$max_redirects" ~default: 5 let params = ("$max_redirects", string_of_int max_redirects) :: params let user = get "$user" ~default: "approx" let params = ("$user", user) :: params let group = get "$group" ~default: "approx" let params = ("$group", group) :: params let syslog = get "$syslog" ~default: "daemon" let params = ("$syslog", syslog) :: params let pdiffs = get_bool "$pdiffs" ~default: true let params = ("$pdiffs", string_of_bool pdiffs) :: params let offline = get_bool "$offline" ~default: false let params = ("$offline", string_of_bool offline) :: params let max_wait = get_int "$max_wait" ~default: 10 (* seconds *) let params = ("$max_wait", string_of_int max_wait) :: params let debug = get_bool "$debug" ~default: false let params = ("$debug", string_of_bool debug) :: params let verbose = get_bool "$verbose" ~default: false || debug let params = ("$verbose", string_of_bool verbose) :: params let repos = fold (fun k v l -> if k.[0] <> '$' then (k, v) :: l else l) [] let sort_config = List.sort (fun x y -> compare (fst x) (fst y)) let section str = "

" ^ str ^ "

\n" let repository_table items = String.concat "" (List.map (fun (k, v) -> "" ^ k ^ "\ " ^ v ^ "\n") (sort_config items)) let parameter_table items = String.concat "" (List.map (fun (k, v) -> "" ^ k ^ "" ^ v ^ "\n") (sort_config items)) let css = "body { margin: 24pt }\n\ td { padding-right: 18pt }\n\ td h2 { padding-top: 18pt }\n" let index = "\n\ \n\ approx server\n\ \n\ \n\ \n\ \n" ^ section "approx server" ^ parameter_table server_config ^ section "Repository Mappings" ^ repository_table repos ^ section "Configuration Parameters" ^ parameter_table params ^ "
\n\ \n\ " approx-5.4/config.mli000066400000000000000000000017151225122005200146360ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) val version : string val arguments : string list (* without config file args *) val cache_dir : string val interval : int (* minutes *) val max_rate : string (* bytes/second with optional K, M, or G suffix *) val max_redirects : int val user : string val group : string val syslog : string val pdiffs : bool val offline : bool val max_wait : int (* seconds *) val verbose : bool val debug : bool val index : string (* simple HTML index for the server *) (* Extract the distribution and relative filename from the absolute pathname of a file in the cache. Example: split_pathname "/var/cache/approx/debian/pool/main/..." returns ("debian", "pool/main/...") *) val split_cache_path : string -> string * string (* Remove cache directory prefix from a pathname, if present *) val shorten : string -> string approx-5.4/config_file.ml000066400000000000000000000032211225122005200154560ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2009 Eric C. Cooper Released under the GNU General Public License *) open Util let lines_of_channel chan = let next () = try Some (input_line chan) with End_of_file -> None in let rec loop list = match next () with | Some line -> loop (line :: list) | None -> List.rev list in loop [] let comment_re = Pcre.regexp "\\s*#.*$" let remove_comment str = Pcre.qreplace ~rex: comment_re str ~templ: "" let words_of_line line = Pcre.split (remove_comment line) let map = ref [] let mem k = List.mem_assoc k !map let get_generic convert ?default k = try convert (List.assoc k !map) with Not_found -> (match default with | Some v -> v | None -> raise Not_found) let get = get_generic (fun x -> x) let get_int = get_generic int_of_string let bool_of_string str = match String.lowercase str with | "true" | "yes" | "on" | "1" -> true | "false" | "no" | "off" | "0" -> false | _ -> failwith ("not a boolean value: " ^ str) let get_bool = get_generic bool_of_string let set key value = map := (key, value) :: !map let fold f init = List.fold_left (fun x (k, v) -> f k v x) init !map let iter f = fold (fun k v () -> f k v) () let read filename = let read_file chan = let lines = List.map words_of_line (lines_of_channel chan) in close_in chan; let enter = function | [key; value] -> set key value | [] -> () | words -> failwith ("malformed line in " ^ filename ^ ": " ^ String.concat " " words) in List.iter enter lines in with_in_channel open_in filename read_file approx-5.4/config_file.mli000066400000000000000000000006441225122005200156350ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) val read : string -> unit val get : ?default:string -> string -> string val get_int : ?default:int -> string -> int val get_bool : ?default:bool -> string -> bool val fold : (string -> string -> 'a -> 'a) -> 'a -> 'a val iter : (string -> string -> unit) -> unit approx-5.4/control_file.ml000066400000000000000000000123341225122005200156760ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) open Config open Log open Util type paragraph = { file : string; line : int; fields : (string * string) list } exception Missing of paragraph * string let defined name par = List.mem_assoc name par.fields let lookup name par = try List.assoc name par.fields with Not_found -> raise (Missing (par, name)) let file_name par = par.file let line_number par = par.line let iter_fields proc par = List.iter proc par.fields let trim_left s i = let n = String.length s in let rec loop i = if i < n && (s.[i] = ' ' || s.[i] = '\t') then loop (i + 1) else i in loop i let rec trim_right s i = let rec loop i = if i > 0 && (s.[i - 1] = ' ' || s.[i - 1] = '\t') then loop (i - 1) else i in loop i let trim s = substring s ~until: (trim_right s (String.length s)) let parse line = try let i = String.index line ':' in let name = String.lowercase (substring line ~until: (trim_right line i)) in let info = substring line ~from: (trim_left line (i + 1)) in name, info with _ -> failwith ("malformed line: " ^ line) let next_line chan = try Some (trim (input_line chan)) with End_of_file -> None (* Check if a file is a signed control file *) let is_signed file = Filename.basename file = "InRelease" (* Check the initial lines of a cleartext signed message (as defined in RFC 4880) and return the new line number *) let skip_initial_lines chan = let is_hash line = is_prefix "Hash:" line in let rec loop n = match next_line chan with | None -> failwith "EOF in PGP header" | Some "" -> n + 1 | Some line -> if is_hash line then loop (n + 1) else failwith ("unexpected line in PGP header: " ^ line) in begin match next_line chan with (* line 1 *) | Some "-----BEGIN PGP SIGNED MESSAGE-----" -> () | _ -> failwith "missing PGP header" end; begin match next_line chan with (* line 2 *) | None -> failwith "EOF in PGP header" | Some line -> if not (is_hash line) then failwith "missing Hash in PGP header" end; loop 3 let rec skip_final_lines chan = match next_line chan with | None -> () | Some _ -> skip_final_lines chan let read_paragraph file n chan = let rec loop lines i j = match next_line chan with | None -> if lines <> [] then lines, i, j + 1 else raise End_of_file | Some "-----BEGIN PGP SIGNATURE-----" when is_signed file -> if lines <> [] then begin skip_final_lines chan; lines, i, j + 1 end else raise End_of_file | Some "" -> if lines <> [] then lines, i, j + 1 else loop [] (i + 1) (j + 1) | Some line -> if line.[0] = ' ' || line.[0] = '\t' then match lines with | last :: others -> let line = if line = " ." then "" else substring line ~from: 1 in loop ((last ^ "\n" ^ line) :: others) i (j + 1) | [] -> failwith ("leading white space: " ^ line) else loop (line :: lines) i (j + 1) in let n = if n = 1 && is_signed file then skip_initial_lines chan else n in let fields, i, j = loop [] n n in { file = file; line = i; fields = List.rev_map parse fields }, j let fold f init file = let read_file chan = let next n = try Some (read_paragraph file n chan) with End_of_file -> None in let rec loop x n = match next n with | Some (p, n') -> loop (f x p) n' | None -> x in loop init 1 in with_in_channel open_file file read_file let iter = iter_of_fold fold let read file = let once prev p = match prev with | None -> Some p | Some _ -> failwith (file ^ " contains more than one paragraph") in match fold once None file with | Some p -> p | None -> failwith (file ^ " contains no paragraphs") let get_checksum par = if defined "sha256" par then lookup "sha256" par, file_sha256sum else if defined "sha1" par then lookup "sha1" par, file_sha1sum else lookup "md5sum" par, file_md5sum type info = string * int64 let info_list data = let lines = match split_lines data with | "" :: lines -> lines | lines -> lines in List.map (fun line -> Scanf.sscanf line "%s %Ld %s" (fun sum size file -> (sum, size), file)) lines let read_checksum_info file = let lines, checksum = get_checksum (read file) in info_list lines, checksum let lookup_info field par = info_list (lookup field par) type validity = | Valid | Wrong_size of int64 | Wrong_checksum of string let validate ?checksum (sum, size) file = let n = file_size file in if n <> size then Wrong_size n else match checksum with | Some file_checksum -> let s = file_checksum file in if s <> sum then Wrong_checksum s else Valid | None -> Valid let valid checksum ((s, n) as info) file = match validate ~checksum info file with | Valid -> true | Wrong_size n' -> debug_message "%s: size %Ld should be %Ld" (shorten file) n' n; false | Wrong_checksum s' -> debug_message "%s: checksum %s should be %s" (shorten file) s' s; false approx-5.4/control_file.mli000066400000000000000000000044361225122005200160530ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2010 Eric C. Cooper Released under the GNU General Public License *) (* The format of Debian control files is defined in http://www.debian.org/doc/debian-policy/ch-controlfields.html *) (* Abstract type respresenting a paragraph in a control file *) type paragraph (* Name of control file from which paragraph was read *) val file_name : paragraph -> string (* Line number at which paragraph starts *) val line_number : paragraph -> int (* Apply a procedure to each (field, value) pair *) val iter_fields : (string * string -> unit) -> paragraph -> unit (* Exception raised when a field lookup fails *) exception Missing of paragraph * string (* Check if a field is present *) val defined : string -> paragraph -> bool (* Find the value corresponding to a field name, or raise Missing *) val lookup : string -> paragraph -> string (* Fold a function over each paragraph in a Debian control file *) val fold : ('a -> paragraph -> 'a) -> 'a -> string -> 'a (* Apply a procedure to each paragraph in a Debian control file *) val iter : (paragraph -> unit) -> string -> unit (* Parse a Debian control file consisting of a single paragraph, such as a Release or DiffIndex file *) val read : string -> paragraph (* Return the strongest checksum information in a paragraph, along with the corresponding checksum function *) val get_checksum : paragraph -> string * (string -> string) (* File information: checksum and size *) type info = string * int64 (* Parse a string consisting of checksum, size, and filename lines *) val info_list : string -> (info * string) list (* Apply info_list to the value of a field *) val lookup_info : string -> paragraph -> (info * string) list (* Read a single-paragraph control file and return a pair consisting of the list of ((checksum, size), filename) lines and the checksum function *) val read_checksum_info : string -> (info * string) list * (string -> string) (* Validate a file's checksum and size *) type validity = | Valid | Wrong_size of int64 | Wrong_checksum of string val validate : ?checksum:(string -> string) -> info -> string -> validity (* Check that a file matches its checksum and size *) val valid : (string -> string) -> info -> string -> bool approx-5.4/doc/000077500000000000000000000000001225122005200134275ustar00rootroot00000000000000approx-5.4/doc/FAQ000066400000000000000000000062261225122005200137670ustar00rootroot00000000000000Changing the location of the approx cache Use the $cache parameter in /etc/approx.conf. Make sure the specified location exists and is owned by the approx user and group. Use a subdirectory for the cache, not a top-level filesystem with a lost+found directory in it, to avoid complaints from the approx-gc program. Changing the temporary directory used by approx The TMPDIR environment variable can be used to specify the temporary directory used by approx. Please see the section on "Passing environment variables to approx", below. Using approx with xinetd An example configuration file is provided in /usr/share/doc/approx/examples/approx.xinetd Exporting a local package repository This is supported with file URLs. Note that the syntax for file URLs requires 3 leading slashes (two for the URL syntax, and one for the root of the pathname). So you can add something like this to /etc/approx/approx.conf: local file:///my/local/repo The repo must have the structure that apt expects, including a Packages.bz2 index. You can maintain a local repo with a tool like dpkg-scanpackages or reprepro. Changing the TCP port on which approx listens Run "dpkg-reconfigure approx" and enter the desired port number when prompted. Changing the IP addresses on which approx listens Add a host address specifier at the beginning of the approx entry in /etc/inetd.conf. See the inetd(8) manual page for details. Controlling access to approx using TCP wrappers (hosts.allow and hosts.deny) The /etc/hosts.allow and /etc/hosts.deny files can be used for host-based control of the approx service. After adding the appropriate entries (see the hosts_access(5) manual page), add the line OPTIONS="-l" to the file /etc/default/openbsd-inetd and then restart openbsd-inetd. Making approx use a proxy for its downloads Since approx uses the curl(1) command to download files from remote repositories, you can use the http_proxy environment variable (or one of the others documented in the curl(1) manual page). Please see the section on "Passing environment variables to approx", below. Handling high client load The openbsd-inetd version of inetd limits the rate of requests by default. See the inetd(8) man page for ways to change the limit. Passing environment variables to approx The openbsd-inetd version of inetd passes only certain environment variables to servers. Here are some workarounds: 1. Modify the inetd.conf entry for approx to run an executable wrapper script instead of the /usr/sbin/approx binary. You can use something like this for the wrapper: #!/bin/sh export MY_ENVIRONMENT_VARIABLE=... exec /usr/sbin/approx 2. Modify the inetd.conf entry for approx to run the "/usr/bin/env" command, with the desired environment variable settings and "/usr/sbin/approx" as arguments. Passing options to the curl(1) command used by approx You can specify options in a .curlrc file in the approx user's home directory (/var/cache/approx/ by default). For example, adding the line "--ipv4" to this file will force curl to resolve hostnames to IPv4 addresses only. approx-5.4/doc/README.concurrency000066400000000000000000000040271225122005200166430ustar00rootroot00000000000000Concurrency control issues in approx Eric Cooper June 2008 The priorities for dealing with concurrency in approx are: 1. ensure correctness (no corrupt files delivered to clients or stored in the cache) 2. maintain good performance (minimize delays due to serialization, minimize number of downloads from remote repositories) There are two sources of potential conflicts: A: between approx and approx-gc When approx-gc runs, we need to ensure that it reads valid Packages files. If it reads a truncated or otherwise corrupt Packages file, it might conclude that many current .debs are not reachable and delete them from the cache. While this is not fatal (it's only a cache, after all), it would have a severe performance impact. But approx downloads partial files with a unique temporary extension, and only renames them upon successful completion. And since the rename is atomic, this potential conflict is a non-problem. Another conflict can occur if approx-gc deletes a temporary file that is in the process of being downloaded on behalf of an approx client. To avoid this, approx-gc doesn't delete recently-modified files. B: between concurrent approx processes The atomic renaming of downloaded files prevents one approx process from delivering a file that has been partially downloaded by another. But suppose a large download, like the main Packages.bz2 file, is in progress. Another approx process might decide to download it also. To avoid this, approx creates a hint file in the cache directory before starting the download. We use the presence of a current hint file as an indication that the download is already in progress, and wait for it (at least for a while). There is still a race condition between the time that an approx process checks for the non-existence of a hint file and the time it creates it, so it is possible for two approx processes to download the same file simultaneously. The atomic rename of unique temporary files ensures correctness in this case, at the cost of some network bandwidth. approx-5.4/doc/approx-gc.8000066400000000000000000000034541225122005200154260ustar00rootroot00000000000000.\" approx: proxy server for Debian archive files .\" Copyright (C) 2011 Eric C. Cooper .\" Released under the GNU General Public License .\" -*- nroff -*- .TH APPROX-GC 8 "May 2011" .\" Please adjust this date whenever revising the manpage. .SH NAME approx-gc \- garbage-collect the cache of Debian archive files .SH SYNOPSIS .PP .B approx-gc [\fIOPTION\fP]... .SH DESCRIPTION .PP .B approx-gc scans the cache created by .BR approx (8) and finds files that are corrupted or no longer needed. With no options specified, these files are listed on standard output and removed from the cache. .PP A corrupted file is one whose size or checksum does not match the value specified in the .I Packages or .I Sources file. .PP An unneeded file is one that is not referenced from any distribution's .I Packages or .I Sources file. .PP .B approx-gc may take several minutes to finish. .SH OPTIONS .TP .BR \-c " file, " \-\^\-config " file" Specify an additional configuration file. May be used multiple times. .TP .BR \-f ", " \-\^\-fast Don't perform checksum validation. .TP .BR \-k ", " \-\^\-keep ", " \-s ", " \-\^\-simulate Don't remove files from the cache. .TP .BR \-q ", " \-\^\-quiet Don't print file names. .TP .BR \-v ", " \-\^\-verbose Print the reason for removal of each file. .SH EXAMPLES .PP To remove all unneeded or corrupted files from the cache: .IP approx-gc \-\^\-quiet .PP This is run as a weekly .BR cron (8) job. .PP To list the files that would be removed from the cache, without actually doing so: .IP approx-gc \-\^\-keep .SH FILES .TP .I /etc/approx/approx.conf .br Configuration file for .B approx and related programs. .TP .I /var/cache/approx .br Default cache directory for archive files. .SH SEE ALSO .IR approx.conf (5), .BR approx (8), .BR cron (8) .SH AUTHOR Eric Cooper approx-5.4/doc/approx-import.8000066400000000000000000000036171225122005200163500ustar00rootroot00000000000000.\" approx: proxy server for Debian archive files .\" Copyright (C) 2010 Eric C. Cooper .\" Released under the GNU General Public License .\" -*- nroff -*- .TH APPROX-IMPORT 8 "June 2010" .\" Please adjust this date whenever revising the manpage. .SH NAME approx-import \- copy local .deb files into the approx cache .SH SYNOPSIS .PP .B approx-import [\fIOPTION\fP]... file.deb \&... .SH DESCRIPTION .PP .B approx-import copies local .deb files, such as those found in .IR /var/cache/apt/archives , into the .BR approx (8) cache. .PP It does so by computing the MD5 checksum of each argument. If it matches a package in one of the .I Packages files in the .BR approx (8) cache, then the file is copied to the appropriate location in the cache. .PP Note that all .I Packages files must be scanned, regardless of how many .deb files are being imported, and this process may take several minutes. .PP The .B approx-import command must be run by the superuser or the .BR approx (8) user. .SH OPTIONS .TP .BR \-c " file, " \-\^\-config " file" Specify an additional configuration file. May be used multiple times. .TP .BR \-s ", " \-\^\-simulate Process files but do not actually copy them into the cache. .TP .BR \-q ", " \-\^\-quiet Don't print the names of files that are imported. .TP .BR \-v ", " \-\^\-verbose Print the disposition of each file. .SH EXAMPLES .PP To import packages from the local .BR apt (8) cache: .IP approx-import /var/cache/apt/archives/*.deb .PP To import packages from another proxy's cache: .IP find other-cache \-name "*.deb" | xargs approx-import .PP (where .I other-cache is the pathname of the other proxy's cache directory) .SH FILES .TP .I /etc/approx/approx.conf .br Configuration file for .B approx and related programs. .TP .I /var/cache/approx .br Default cache directory for archive files. .SH SEE ALSO .IR approx.conf (5), .BR approx (8) .SH AUTHOR Eric Cooper approx-5.4/doc/approx.8000066400000000000000000000052111225122005200150300ustar00rootroot00000000000000.\" approx: proxy server for Debian archive files .\" Copyright (C) 2011 Eric C. Cooper .\" Released under the GNU General Public License .\" -*- nroff -*- .TH APPROX 8 "May 2011" .\" Please adjust this date whenever revising the manpage. .SH NAME approx \- proxy server for Debian archive files .SH SYNOPSIS .PP .B approx [\fIOPTION\fP]... .SH DESCRIPTION .B approx responds to HTTP requests made by .BR apt\-get (8). It maintains a cache of Debian archive files that have been previously downloaded, so that it can respond with a local copy when possible. If a file not in the cache is requested, .B approx will download it from a remote Debian repository and deliver the contents to the client, simultaneously caching it for future use. Over time, the .B approx server cache will grow to contain multiple, unneeded versions of Debian packages. The .BR approx-gc (8) program removes these from the cache. .SH OPTIONS .TP .BR \-c " file, " \-\^\-config " file" Specify an additional configuration file. May be used multiple times. .SH USAGE .PP .B approx is invoked by .BR inetd (8). .SH EXAMPLES .PP Suppose that a client machine's .I /etc/apt/sources.list file contains the following lines: .IP deb http://apt:9999/debian testing main .br deb http://apt:9999/security testing/updates main .br deb-src http://apt:9999/debian unstable main .PP In this example, .I apt is the hostname of the .B approx server machine on the local network. Each distribution, such as "debian" or "security", is mapped to a remote repository in the .B approx server's configuration file. .PP For example, the .I approx.conf file on the .B approx server might contain the lines .IP debian http://ftp.debian.org/debian .br security http://security.debian.org .PP The mapping scheme is very simple. If the .I approx.conf file contains the line .IP repository http://remote-host/initial/path .PP then any request to the .B approx server of the form .IP http://approx-server/repository/rest/of/URL .PP is rewritten to .IP http://remote-host/initial/path/rest/of/URL .PP when there is a "cache miss", and that file is cached as .IP /var/cache/approx/repository/rest/of/URL .PP (Note that the repository name on the left-hand side is not included in the rewritten URL unless it is explicitly mentioned in the right-hand side's initial path.) .SH FILES .TP .I /etc/approx/approx.conf .br Configuration file for .B approx and related programs. .TP .I /var/cache/approx .br Default cache directory for archive files. .SH SEE ALSO .IR approx.conf (5), .BR inetd (8), .BR approx-import (8), .BR approx-gc (8), .BR apt-get (8), .IR sources.list (5) .SH AUTHOR Eric Cooper approx-5.4/doc/approx.conf.5000066400000000000000000000054751225122005200157650ustar00rootroot00000000000000.\" approx: proxy server for Debian archive files .\" Copyright (C) 2012 Eric C. Cooper .\" Released under the GNU General Public License .\" -*- nroff -*- .TH APPROX.CONF 5 "Apr 2012" .\" Please adjust this date when revising the manpage. .SH NAME approx.conf \- configuration file for approx proxy server .SH SYNOPSIS .PP /etc/approx/approx.conf .SH DESCRIPTION .PP Each non-blank line of the configuration file should contain a name/value pair, separated by white space. Comments start with a "#" character and continue to the end of the line. .PP Names that begin with the "$" character are reserved for use as configuration parameters. The following parameters are currently defined: .IP $cache Specifies the location of the approx cache directory (default: .IR /var/cache/approx ). It and all its subdirectories must be owned by the approx server (see also the $user and $group parameters, below.) .IP $interval Specifies the time in minutes after which a cached file will be considered too old to deliver without first checking with the remote repository for a newer version (default: 60) .IP $max_rate Specifies the maximum download rate from remote repositories, in bytes per second (default: unlimited). The value may be suffixed with "K", "M", or "G" to indicate kilobytes, megabytes, or gigabytes per second, respectively. .IP $max_redirects Specifies the maximum number of HTTP redirections that will be followed when downloading a remote file (default: 5) .IP "$user, $group" Specifies the user and group that owns the files in the approx cache (default: approx) .IP $syslog Specifies the .BR syslog (3) facility to use when logging (default: daemon) .IP $pdiffs Specifies whether to support IndexFile diffs (default: .BR true ) .IP $offline Specifies whether to deliver (possibly out-of-date) cached files when they cannot be downloaded from remote repositories (default: .BR false ) .IP $max_wait Specifies how many seconds an .BR approx (8) process will wait for a concurrent download of a file to complete, before attempting to download the file itself (default: 10) .IP $verbose Specifies whether informational messages should be printed in the log (default: .BR false ) .IP $debug Specifies whether debugging messages should be printed in the log (default: .BR false ) .PP The other name/value pairs are used to map distribution names to remote repositories. For example, .IP debian http://ftp.debian.org/debian .br security http://security.debian.org .SH TCP PORT NUMBER .PP The port on which .BR approx (8) listens is not specified in this file, but in .IR /etc/inetd.conf . The default value is 9999, for compatibility with .BR apt\-proxy (8), but it may be changed by running the command .IP dpkg-reconfigure approx .SH SEE ALSO .BR approx (8), .BR approx-gc (8), .BR inetd (8) .SH AUTHOR Eric Cooper approx-5.4/doc/approx.xinetd000066400000000000000000000001411225122005200161510ustar00rootroot00000000000000service approx { socket_type = stream wait = no user = approx server = /usr/sbin/approx } approx-5.4/etc/000077500000000000000000000000001225122005200134355ustar00rootroot00000000000000approx-5.4/etc/approx.conf000066400000000000000000000010711225122005200156140ustar00rootroot00000000000000# Here are some examples of remote repository mappings. # See http://www.debian.org/mirror/list for mirror sites. #debian http://ftp.debian.org/debian #security http://security.debian.org/debian-security # The following are the default parameter values, so there is # no need to uncomment them unless you want a different value. # See approx.conf(5) for details. #$cache /var/cache/approx #$interval 60 #$max_rate unlimited #$max_redirects 5 #$user approx #$group approx #$syslog daemon #$pdiffs true #$offline false #$max_wait 10 #$verbose false #$debug false approx-5.4/etc/approx.socket000066400000000000000000000002471225122005200161630ustar00rootroot00000000000000[Unit] Description=caching proxy server for Debian archive files Documentation=man:approx(8) [Socket] ListenStream=9999 Accept=yes [Install] WantedBy=sockets.target approx-5.4/etc/approx@.service000066400000000000000000000002451225122005200164310ustar00rootroot00000000000000[Unit] Description=caching proxy server for Debian archive files Documentation=man:approx(8) [Service] User=approx ExecStart=-/usr/sbin/approx StandardInput=socket approx-5.4/gc_cache.ml000066400000000000000000000200611225122005200147270ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2013 Eric C. Cooper Released under the GNU General Public License *) (* Garbage-collect the approx cache using a mark-sweep algorithm *) open Config open Program open Release open Util let usage () = print "Usage: approx-gc [options] Garbage-collect the approx cache Options: -f|--fast do not validate checksums -k|--keep|-s|--simulate do not remove files -q|--quiet do not print file names -v|--verbose print reason for removal"; exit 1 let no_checksum = ref false let simulate = ref false let quiet = ref false let verbose = ref false let () = List.iter (function | "-f" | "--fast" -> no_checksum := true | "-k" | "--keep" | "-s" | "--simulate" -> simulate := true | "-q" | "--quiet" -> quiet := true | "-v" | "--verbose" -> verbose := true | _ -> usage ()) arguments let no_checksum = !no_checksum let simulate = !simulate let quiet = !quiet let verbose = !verbose (* The cache is probably only a small subset of all the files in the Debian archive, so we start with a table of filenames actually present in this cache, then check their validity as we process the Packages and Sources files *) let file_table = Hashtbl.create 4096 let get_status = Hashtbl.find file_table let set_status = Hashtbl.replace file_table let iter_status proc = Hashtbl.iter proc file_table (* The known distributions are the first-level directories in the cache *) let distributions = List.filter (fun f -> Sys.is_directory (cache_dir ^/ f)) (Array.to_list (Sys.readdir cache_dir)) (* Check if a file is part of a known distribution *) let dist_is_known file = try List.mem (fst (split_cache_path file)) distributions with Invalid_argument _ -> false (* Check if a Release file is no more than 5 minutes older than an InRelease file in the same directory, or vice versa *) let is_current_release file = let current_with other = let dir = Filename.dirname file in let file' = dir ^/ other in not (Sys.file_exists file') || is_cached_nak file' || file_modtime file' -. file_modtime file < 300. in not (is_cached_nak file) && match Filename.basename file with | "Release" -> current_with "InRelease" | "InRelease" -> current_with "Release" | "Release.gpg" -> true | _ -> false (* Scan the cache and add candidates for garbage collection to the status table. If a file is not in this table, it will not be removed. Packages and Sources files are collected and returned in the list of roots for the marking phase, but are not added to the table themselves. DiffIndex files are also returned in the list of roots, so that pdiff files will be marked, and similarly for TranslationIndex files. Since Release files are unreachable from the roots and would otherwise be removed, they are added to the table only if there is a newer version. *) let scan_files () = let scan roots file = let add () = set_status file None; roots in let skip_root () = file :: roots in let skip () = roots in if not (dist_is_known file) then add () else if is_index file || is_diff_index file || is_i18n_index file then skip_root () else if is_current_release file then skip () else add () in fold_non_dirs scan [] cache_dir (* Handle the case of filename fields of the form ./path *) let canonical path = if String.length path >= 2 && path.[0] = '.' && path.[1] = '/' then substring path ~from: 2 else path (* If a file is present in the status table, mark it with the result of checking its size and checksum against the given information *) let mark_generic pf vf checksum (info, file) = let path = pf (canonical file) in try match get_status path with | None -> if is_cached_nak path then begin if minutes_old (file_ctime path) <= interval then (* keep it since it's reachable and current *) set_status path (Some Control_file.Valid) end else let status = vf path (Control_file.validate ?checksum info) in set_status path (Some status) | Some _ -> (* already marked *) () with Not_found -> () let mark_file prefix = mark_generic ((^/) prefix) (fun f k -> k f) let mark_package prefix fields = let filename = Control_file.lookup "filename" fields in let size = Int64.of_string (Control_file.lookup "size" fields) in let sum, func = Control_file.get_checksum fields in let checksum = if no_checksum then None else Some func in mark_file prefix checksum ((sum, size), filename) let source_directory prefix fields = match try Control_file.lookup "directory" fields with Control_file.Missing _ -> "." with | "." -> prefix | dir -> prefix ^/ dir let mark_source prefix fields = let dir = source_directory prefix fields in let info = Control_file.lookup_info "files" fields in let checksum = if no_checksum then None else Some file_md5sum in List.iter (mark_file dir checksum) info (* Like mark_file, but deals with the complication that the DiffIndex file refers only to uncompressed pdiffs *) let mark_pdiff prefix = mark_generic (fun f -> prefix ^/ f ^ ".gz") with_decompressed let mark_diff_index prefix index = let items = Control_file.read index in let pdiffs = Control_file.lookup_info "sha1-patches" items in let checksum = if no_checksum then None else Some file_sha1sum in List.iter (mark_pdiff prefix checksum) pdiffs let mark_i18n_index prefix index = let items = Control_file.read index in let translations = Control_file.lookup_info "sha1" items in let checksum = if no_checksum then None else Some file_sha1sum in List.iter (mark_file prefix checksum) translations let mark_index index = if verbose then print "[ %s ]" (shorten index); if is_index index then let dist, _ = split_cache_path index in let prefix = cache_dir ^/ dist in if is_packages_file index then Control_file.iter (mark_package prefix) index else if is_sources_file index then Control_file.iter (mark_source prefix) index else file_message index "not a Packages or Sources file" else if is_diff_index index then let prefix = Filename.dirname index in mark_diff_index prefix index else if is_i18n_index index then let prefix = Filename.dirname index in mark_i18n_index prefix index else file_message index "unexpected index file" let mark () = let roots = scan_files () in let mark_root r = if not (is_cached_nak r) then mark_index r in List.iter mark_root roots let status_suffix = function | None -> "" | Some (Control_file.Wrong_size _) -> ": incorrect size" | Some (Control_file.Wrong_checksum _) -> ": incorrect checksum" | Some Control_file.Valid -> assert false let print_gc file status = if not quiet then print "%s%s" (shorten file) (if verbose then status_suffix status else "") let inactive file = Unix.time () -. file_modtime file > 300. (* 5 minutes *) let sweep () = let gc file = function | Some Control_file.Valid -> () | status -> if inactive file then (print_gc file status; if not simulate then perform Sys.remove file) else if verbose then file_message file "not old enough to remove" in iter_status gc let empty_dirs = let collect_empty list dir = try if Sys.readdir dir = [||] then dir :: list else list with e -> print "%s" (string_of_exception e); list in fold_dirs collect_empty [] let remove_dir dir = if not quiet then print "%s%s" (shorten dir) (if verbose then ": empty directory" else "/"); (* any exception raised by rmdir will terminate the pruning loop *) if not simulate then perform Unix.rmdir dir let rec prune () = match empty_dirs cache_dir with | [] -> () | [dir] when dir = cache_dir -> () (* don't remove cache dir *) | list -> List.iter remove_dir list; if not simulate then prune () let garbage_collect () = if not simulate then drop_privileges ~user ~group; mark (); sweep (); prune () let () = main_program garbage_collect () approx-5.4/import.ml000066400000000000000000000123221225122005200145260ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) (* Import local files into the approx cache *) open Config open Program open Util let usage () = print "Usage: approx-import [options] file ... Import local files into the approx cache Options: -s|--simulate scan but do not actually import any files -q|--quiet do not print the file names that are imported -v|--verbose print information about each file"; exit 1 let simulate = ref false let quiet = ref false let verbose = ref false let files = ref [] let () = List.iter (function | "-k" | "--keep" | "-s" | "--simulate" -> simulate := true | "-q" | "--quiet" -> quiet := true | "-v" | "--verbose" -> verbose := true | arg -> if arg.[0] = '-' then usage () else files := arg :: !files) arguments let simulate = !simulate let quiet = !quiet let verbose = !verbose let files = if !files <> [] then List.rev !files else usage () (* Import status of an individual file *) type import_status = | Not_seen | Exists of string | Imported of string let imported = function | Imported _ -> true | _ -> false let string_of_import_status = function | Not_seen -> "not referenced by any Packages file" | Exists loc -> "already cached as " ^ loc | Imported loc -> "imported to " ^ loc (* Information about a package that can be extracted from its filename, size, and md5sum *) type package = { name : string; epoch : string; version : string; arch : string; size : int64; file : string; base : string; md5sum : string; mutable status : import_status } (* Regular expression for matching package filenames *) let file_re = Pcre.regexp "^([^_]+)_(?:(\\d+)%3a)?([^_]+)_(.+)\\.deb$" let package_of_file file = let base = Filename.basename file in match Pcre.extract ~rex: file_re ~full_match: false base with | [| name; epoch; version; arch |] -> { name = name; epoch = epoch; version = version; arch = arch; size = file_size file; file = file; base = base; md5sum = file_md5sum file; status = Not_seen } | _ -> raise Not_found let without_epoch version = try substring ~from: (String.index version ':' + 1) version with Not_found -> version let packages = Hashtbl.create (List.length files) let add_package pkg = (try let q = Hashtbl.find packages pkg.md5sum in if pkg.name <> q.name then print "%s: MD5 collision with %s" pkg.base q.base with Not_found -> ()); Hashtbl.replace packages pkg.md5sum pkg let scan_files () = let add_file name = if Sys.file_exists name then begin try add_package (package_of_file name) with Not_found -> if verbose then print "%s: ignored" (Filename.basename name) end else print "%s: not found" name in let n = List.length files in if n > 1 && verbose then print "[ scanning %d files ]" n; List.iter add_file files; if Hashtbl.length packages = 0 then begin if not quiet then print "%s" "no .deb files specified"; exit 1 end let import_package pkg dst = let target = cache_dir ^/ dst in if Sys.file_exists target then pkg.status <- Exists dst else begin pkg.status <- Imported dst; if not simulate then begin make_directory (Filename.dirname target); ignore (Sys.command (Printf.sprintf "cp -p %s %s" pkg.file target)) end end let maybe_import pkg fields dist = let mismatch kind = if verbose then print "%s: %s mismatch (should be %s)" pkg.base kind (Control_file.lookup kind fields) in if not (imported pkg.status) then if pkg.version = without_epoch (Control_file.lookup "version" fields) then if pkg.arch = Control_file.lookup "architecture" fields then if pkg.size = Int64.of_string (Control_file.lookup "size" fields) then import_package pkg (dist ^/ Control_file.lookup "filename" fields) else mismatch "size" else mismatch "architecture" else mismatch "version" let index_seen = ref false let import_files index = if Release.is_packages_file index then let dist, path = split_cache_path index in let check_package fields = try let md5sum = Control_file.lookup "md5sum" fields in maybe_import (Hashtbl.find packages md5sum) fields dist with Not_found -> () in index_seen := true; if verbose then print "[ %s/%s ]" dist path; Control_file.iter check_package index let print_package { base = base; status = status } = if verbose || imported status then print "%s: %s" base (string_of_import_status status) let print_status () = let pkgs = Hashtbl.fold (fun _ pkg list -> pkg :: list) packages [] in let cmp p q = String.compare p.base q.base in List.iter print_package (List.sort cmp pkgs) let import () = if not simulate then drop_privileges ~user ~group; scan_files (); iter_non_dirs import_files cache_dir; if not !index_seen then begin print "%s" "There are no Packages files in the approx cache.\n\ Please run \"apt-get update\" first."; exit 1 end; if not quiet then print_status () let () = main_program import () approx-5.4/log.ml000066400000000000000000000031071225122005200137760ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2013 Eric C. Cooper Released under the GNU General Public License *) open Printf let stderr_log _ msg = prerr_string msg; flush stderr let writer = ref stderr_log let log_to_stderr () = writer := stderr_log let facility_of_string s = match String.lowercase s with | "authpriv" -> `Authpriv | "cron" -> `Cron | "daemon" -> `Daemon | "ftp" -> `Ftp | "kern" -> `Kern | "local0" -> `Local0 | "local1" -> `Local1 | "local2" -> `Local2 | "local3" -> `Local3 | "local4" -> `Local4 | "local5" -> `Local5 | "local6" -> `Local6 | "local7" -> `Local7 | "lpr" -> `Lpr | "mail" -> `Mail | "news" -> `News | "syslog" -> `Syslog | "user" -> `User | "uucp" -> `Uucp | "default" -> `Default | _ -> Util.invalid_string_arg "syslog facility" s let log_to_syslog () = let facility = facility_of_string Config.syslog in let ident = sprintf "%s[%d]" (Filename.basename Sys.argv.(0)) (Unix.getpid ()) in Netsys_posix.openlog (Some ident) [] facility; writer := Netsys_posix.syslog facility let message enabled level = (* ensure message is newline-terminated, otherwise syslog-ng behaves differently than syslog *) let terminate str = let n = String.length str in if n = 0 || str.[n - 1] <> '\n' then str ^ "\n" else str in ksprintf (fun str -> if enabled then !writer level (terminate str)) let error_message fmt = message true `Err fmt let info_message fmt = message Config.verbose `Info fmt let debug_message fmt = message Config.debug `Debug fmt approx-5.4/log.mli000066400000000000000000000006341225122005200141510ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) val error_message : ('a, unit, string, unit) format4 -> 'a val info_message : ('a, unit, string, unit) format4 -> 'a val debug_message : ('a, unit, string, unit) format4 -> 'a val log_to_stderr : unit -> unit (* default *) val log_to_syslog : unit -> unit approx-5.4/myocamlbuild.ml000066400000000000000000000013371225122005200157010ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2013 Eric C. Cooper Released under the GNU General Public License *) open Ocamlbuild_plugin open Command open Pathname let libraries = ["pcre"; "sha"; "netsys_oothr"; "netsys"; "equeue"; "netstring"; "netcgi2:netcgi"; "nethttpd"] let split str = let i = String.index str ':' in String.sub str 0 i, String.sub str (i + 1) (String.length str - (i + 1)) let add_library lib = let inc, lib = if String.contains lib ':' then split lib else lib, lib in ocaml_lib ~extern: true ~dir: ("+" ^ inc) lib let custom_rules () = List.iter add_library libraries let () = dispatch (function After_rules -> custom_rules () | _ -> ()) approx-5.4/patch.ml000066400000000000000000000060051225122005200143140ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2008 Eric C. Cooper Released under the GNU General Public License *) open Util let get_line chan = try Some (input_line chan) with End_of_file -> None let output_line chan line = output_string chan line; output_char chan '\n' (* Apply a function to lines m through n *) let iter_lines proc m n chan = let rec loop i = if i <= n then match get_line chan with | Some line -> proc line; loop (i + 1) | None -> failwith ("EOF while scanning to line " ^ string_of_int n) in loop m (* Apply a function to all remaining lines *) let iter_eof proc chan = let rec loop () = match get_line chan with | Some line -> proc line; loop () | None -> () in loop () let copy_lines m n ic oc = iter_lines (output_line oc) m n ic let delete_lines = iter_lines ignore (* The following operators implement the corresponding ed commands and update the current input line number *) let append lines n ic oc cur = copy_lines cur n ic oc; List.iter (output_line oc) lines; n + 1 let change lines m n ic oc cur = copy_lines cur (m - 1) ic oc; delete_lines m n ic; List.iter (output_line oc) lines; n + 1 let delete = change [] let copy_tail ic oc cur = iter_eof (output_line oc) ic; 0 (* Collect lines until a terminating "." line is seen *) let get_lines chan = let rec loop lines = match get_line chan with | Some "." -> lines | Some line -> loop (line :: lines) | None -> failwith "EOF occurred before terminating \".\"" in List.rev (loop []) let range_of_string str = try let i = String.index str ',' in let start = int_of_string (substring str ~until: i) in let stop = int_of_string (substring str ~from: (i + 1)) in start, stop with Not_found -> let n = int_of_string str in n, n (* Ed commands are represented as operators on the input channel, output channel, and current line number. When applied, each operator returns the updated line number. *) type t = in_channel -> out_channel -> int -> int (* Translate an ed command into an operator. Additional lines are consumed from the channel in the case of append and change commands. *) let parse_line chan line = let last = String.length line - 1 in try let (m, n) = range_of_string (String.sub line 0 last) in match line.[last] with | 'a' -> assert (m = n); append (get_lines chan) m | 'c' -> change (get_lines chan) m n | 'd' -> delete m n | _ -> raise Exit with _ -> failwith ("malformed ed command: " ^ line) (* Parse an input channel containing ed commands. "diff --ed" produces commands in decreasing line-number order; this function effectively reverses that order as it composes the operators. *) let parse chan = let compose f g ic oc cur = g ic oc (f ic oc cur) in let rec loop op = match get_line chan with | Some line -> loop (compose (parse_line chan line) op) | None -> op in loop copy_tail let apply cmds ic oc = ignore (cmds ic oc 1) approx-5.4/patch.mli000066400000000000000000000012141225122005200144620ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2007 Eric C. Cooper Released under the GNU General Public License *) (* Abstract type representing a sequence of ed commands (append, change, or delete) *) (* Note that "diff --ed" also produces substitute commands of the form "s/.././" in the case where a "." text line is emitted as ".." and then modified. These are not handled here since valid Debian control files cannot contain "." lines. *) type t (* Parse a stream of ed commands *) val parse : in_channel -> t (* Apply a patch sequence *) val apply : t -> in_channel -> out_channel -> unit approx-5.4/pdiff.ml000066400000000000000000000070501225122005200143060ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2013 Eric C. Cooper Released under the GNU General Public License *) open Config open Log open Util let index_file path = Filename.chop_suffix (Filename.dirname path) ".diff" ^ ".bz2" let read_diff_index dir = let diff_index = dir ^/ "Index" in if not (Sys.file_exists diff_index) then failwith (diff_index ^ " does not exist"); let items = Control_file.read diff_index in let current = Control_file.lookup "sha1-current" items ^ " current" in let current_info = match Control_file.info_list current with | [info, "current"] -> info | _ -> failwith ("unexpected SHA1-Current entry: " ^ current) in let combine (index_info, name) (patch_info, name') = if name <> name' then failwith (diff_index ^ " is inconsistent"); (index_info, dir ^/ name, patch_info) in let history = Control_file.lookup_info "sha1-history" items in let patches = Control_file.lookup_info "sha1-patches" items in List.map2 combine history patches, current_info let rec find_tail p = function | x :: rest as list -> if p x then list else find_tail p rest | [] -> [] (* Pdiff application must result in a Packages or Sources file that is identical to the one in the official archive, so this function must use the same bzip2 parameters that dak does. See http://ftp-master.debian.org/git/dak.git *) let compress ~src ~dst = let cmd = Printf.sprintf "/usr/bin/nice /usr/bin/ionice -c3 /bin/bzip2 -9 < %s > %s" src dst in debug_message "Compressing: %s" cmd; if Sys.command cmd <> 0 then failwith "compress"; if debug && not (Release.valid dst) then debug_message "Compressed file %s is invalid" dst (* Apply a parsed pdiff to the given file *) let apply_patch cmds file = let file' = with_in_channel open_in file (fun chan -> with_temp_file file (Patch.apply cmds chan)) in Sys.rename file' file let valid_file info = Control_file.valid file_sha1sum info let apply_pdiffs file pdiffs final index = let patch (index_info, name, pdiff_info) = let pdiff = name ^ ".gz" in let valid_index = valid_file index_info in let valid_pdiff = valid_file pdiff_info in let check_and_apply pdiff' = if valid_pdiff pdiff' then begin debug_message "Applying %s" pdiff; let cmds = with_in_channel open_in pdiff' Patch.parse in if valid_index file then apply_patch cmds file else (debug_message "Invalid index %s" file; raise Exit) end else (debug_message "Invalid pdiff %s" pdiff; raise Exit) in if not (Sys.file_exists pdiff) then Url.download_file pdiff; with_decompressed pdiff check_and_apply in try List.iter patch pdiffs; if valid_file final file then begin info_message "Updated %s" index; compress ~src: file ~dst: index end else error_message "Invalid update of %s" index with Exit -> () let update index = info_message "Updating %s" index; if not (Filename.check_suffix index ".bz2") then invalid_string_arg "Pdiff.update" index; if not (Sys.file_exists index) then Url.download_file index; let dir = Filename.chop_suffix index ".bz2" ^ ".diff" in let diffs, final = read_diff_index dir in let update_index file = let info = (file_sha1sum file, file_size file) in if info = final then debug_message "%s is current" index else match find_tail (fun (i, _, _) -> i = info) diffs with | [] -> failwith (index ^ " not found in DiffIndex") | list -> apply_pdiffs file list final index in decompress_and_apply update_index index approx-5.4/pdiff.mli000066400000000000000000000006231225122005200144560ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) (* Return the filename of the Packages or Sources file corresponding to a pdiff or a DiffIndex file *) val index_file : string -> string (* Update the given Packages or Sources file by applying any needed pdiffs *) val update : string -> unit approx-5.4/program.ml000066400000000000000000000025351225122005200146700ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) open Printf open Util open Log let string_of_uerror = function | err, str, "" -> sprintf "%s: %s" str (Unix.error_message err) | err, str, arg -> sprintf "%s: %s (%s)" str (Unix.error_message err) arg let string_of_exception exc = match exc with | Failure str -> "Failure: " ^ str | Invalid_argument str -> "Invalid argument: " ^ str | Sys_error str -> str | Unix.Unix_error (err, str, arg)-> string_of_uerror (err, str, arg) | Control_file.Missing (par, field) -> sprintf "File %s, line %d: missing \"%s\" field" (Control_file.file_name par) (Control_file.line_number par) (String.capitalize field) | e -> Printexc.to_string e let perform f x = try f x with e -> error_message "%s" (string_of_exception e) let backtrace () = let bt = Printexc.get_backtrace () in if bt <> "" then let lines = split_lines bt in error_message "%s" "Uncaught exception"; List.iter (fun s -> if s <> "" then error_message " %s" s) lines let main_program f x = try f x with e -> backtrace (); error_message "%s" (string_of_exception e); exit 1 let print fmt = error_message fmt let file_message file msg = print "%s: %s" (Config.shorten file) msg approx-5.4/program.mli000066400000000000000000000013701225122005200150350ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) (* Return a descriptive message for an exception *) val string_of_exception : exn -> string (* Print a backtrace for an uncaught exception *) val backtrace : unit -> unit (* Call a procedure and print, but otherwise ignore, any exception *) val perform : ('a -> unit) -> 'a -> unit (* Run the main function of a program and print any uncaught exception *) val main_program : ('a -> unit) -> 'a -> unit (* Print on stderr and append a newline *) val print : ('a, unit, string, unit) format4 -> 'a (* Print a filename followed by a message on stderr *) val file_message : string -> string -> unit approx-5.4/release.ml000066400000000000000000000044161225122005200146410ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) open Config open Log open Util (* Find the newest InRelease or Release file in the given directory or raise Not_found *) let newest dir = newest_file [dir ^/ "InRelease"; dir ^/ "Release"] (* Find the Release file for the given file or raise Not_found *) let find file = match explode_path file with | dist :: "dists" :: suite :: _ -> newest (dist ^/ "dists" ^/ suite) | _ -> raise Not_found let read file = let release = find file in release, Control_file.read_checksum_info release let validate (release, (info_list, checksum)) file = Sys.file_exists file && let rdir = Filename.dirname release in let rfile = if is_prefix rdir file then substring file ~from: (String.length rdir + 1) else invalid_string_arg "Release.validate" file in try let info = fst (List.find (fun (_, name) -> name = rfile) info_list) in Control_file.valid checksum info file with Not_found -> if Filename.dirname file <> rdir then debug_message "%s not found in %s" rfile (shorten release); false let valid file = if file.[0] = '/' then invalid_string_arg "Release.valid" file; try validate (read file) file with Not_found | Control_file.Missing _ -> false let is_variant variants file = List.mem (Filename.basename file) variants let is_packages_file = is_variant (compressed_versions "Packages") let is_sources_file = is_variant (compressed_versions "Sources") let is_index file = is_packages_file file || is_sources_file file let is_release file = match Filename.basename file with | "InRelease" | "Release" | "Release.gpg" -> true | _ -> false let diff_index_dir file = Filename.check_suffix (Filename.dirname file) ".diff" let is_diff_index file = Filename.basename file = "Index" && diff_index_dir file let is_pdiff file = Filename.basename file <> "Index" && diff_index_dir file let is_i18n_index file = Filename.basename file = "Index" && Filename.basename (Filename.dirname file) = "i18n" let immutable_suffixes = [".deb"; ".udeb"; ".dsc"; ".diff.gz"] @ compressed_versions ".tar" let immutable file = List.exists (Filename.check_suffix file) immutable_suffixes || is_pdiff file approx-5.4/release.mli000066400000000000000000000021361225122005200150070ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) (* Check if a file is valid according to the corresponding Release file. The file must be a pathname relative to the cache directory and the caller's working directory must be the cache directory. *) val valid : string -> bool (* Check if a file is a possibly-compressed Packages file *) val is_packages_file : string -> bool (* Check if a file is a possibly-compressed Sources file *) val is_sources_file : string -> bool (* Check if a file is an index (Packages, Sources, or a compressed version) *) val is_index : string -> bool (* Check if a file is an InRelease, Release, or Release.gpg file *) val is_release : string -> bool (* Check if a file is a DiffIndex *) val is_diff_index : string -> bool (* Check if a file is a pdiff *) val is_pdiff : string -> bool (* Check if a file is a TranslationIndex *) val is_i18n_index : string -> bool (* Check if a file is immutable (deb, source file, or pdiff) *) val immutable : string -> bool approx-5.4/tests/000077500000000000000000000000001225122005200140245ustar00rootroot00000000000000approx-5.4/tests/_tags000066400000000000000000000010461225122005200150450ustar00rootroot00000000000000# approx: proxy server for Debian archive files # Copyright (C) 2013 Eric C. Cooper # Released under the GNU General Public License : use_sha # Note: parent _tags file specifies # <**/*.{byte,native}>: use_unix, use_pcre, use_sha # so only additions and deletions to those are needed here : use_bigarray, use_netsys_oothr, use_netsys : use_bigarray, use_netsys_oothr, use_netsys : -use_pcre : -use_pcre approx-5.4/tests/config_test.ml000066400000000000000000000006041225122005200166620ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2007 Eric C. Cooper Released under the GNU General Public License *) open Printf let file = match Array.length Sys.argv with | 2 -> Sys.argv.(1) | _ -> eprintf "Usage: %s config-file\n" Sys.argv.(0); exit 1 let () = Config_file.read file; Config_file.iter (fun k v -> printf "%s: %s\n" k v) approx-5.4/tests/control_file_test.ml000066400000000000000000000020351225122005200200740ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2010 Eric C. Cooper Released under the GNU General Public License *) open Printf open Util let verbose = ref false let file = match Sys.argv with | [| _; file |] -> file | [| _; "-v"; file |] | [| _; "--verbose"; file |] -> verbose := true; file | _ -> eprintf "Usage: %s [-v] control-file\n" Sys.argv.(0); exit 1 let capitalize_parts str = join '-' (List.map String.capitalize (split '-' str)) let print_line = function | "" -> printf " .\n" | line -> printf " %s\n" line let print_pair (field, value) = printf "%s:" (capitalize_parts field); match split_lines value with | [] -> print_newline () | "" :: rest -> print_newline (); List.iter print_line rest | lines -> List.iter print_line lines let print_paragraph p = if !verbose then printf "[%d]\n" (Control_file.line_number p); Control_file.iter_fields print_pair p; print_newline () let () = Control_file.iter print_paragraph file approx-5.4/tests/dir_test.ml000066400000000000000000000013711225122005200161750ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) open Printf open Unix open Util let non_dirs, path = match Sys.argv with | [| _ |] -> false, "." | [| _; "-n" |] -> true, "." | [| _; dir |] -> false, dir | [| _; "-n"; dir |] -> true, dir | _ -> eprintf "Usage: %s [-n] [path]\n" Sys.argv.(0); exit 1 let foldf, metric = if non_dirs then fold_non_dirs, file_size else fold_dirs, fun f -> Int64.of_int (stat f).st_nlink let bigger (path, n as orig) path' = let n' = metric path' in print_endline path'; if n >= n' then orig else (path', n') let () = let biggest, n = foldf bigger ("", 0L) path in printf "\n%Ld\t%s\n" n biggest approx-5.4/tests/metadata_test.ml000066400000000000000000000020551225122005200171770ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) open Config open Program open Util let cache_relative path = if is_prefix cache_dir path then substring path ~from: (String.length cache_dir + 1) else path let check show_immutable path = let file = cache_relative path in let pr = file_message file in let pv msg = pr ((if Release.valid file then "valid" else "invalid") ^ " " ^ msg) in if not (Sys.file_exists file) then pr "not found" else if is_cached_nak file then pr "cached NAK" else if Release.immutable file then (if show_immutable then pr "immutable") else if Release.is_release file then pr "release" else if Release.is_index file then pv "index" else if Release.is_diff_index file then pv "diff_index" else if Release.is_i18n_index file then pv "i18n_index" else pr "unknown" let () = Sys.chdir cache_dir; if arguments = [] then iter_non_dirs (check false) cache_dir else List.iter (check true) arguments approx-5.4/tests/patch_test.ml000066400000000000000000000011541225122005200165150ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2008 Eric C. Cooper Released under the GNU General Public License *) open Printf open Util let diff_file, file_to_patch = match Array.length Sys.argv with | 2 -> Sys.argv.(1), None | 3 -> Sys.argv.(1), Some Sys.argv.(2) | _ -> eprintf "Usage: %s pdiff [file]\n" Sys.argv.(0); exit 1 let cmds = with_in_channel open_file diff_file Patch.parse let () = match file_to_patch with | Some file -> with_in_channel open_file file (fun chan -> Patch.apply cmds chan stdout) | None -> printf "Parsed %s\n" diff_file approx-5.4/tests/sha1_test.ml000066400000000000000000000010011225122005200162410ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2007 Eric C. Cooper Released under the GNU General Public License *) open Printf open Util let file = match Array.length Sys.argv with | 2 -> Sys.argv.(1) | _ -> eprintf "Usage: %s file\n" Sys.argv.(0); exit 1 let get_info chan = let size = LargeFile.in_channel_length chan in let checksum = Sha1.to_hex (Sha1.channel chan (-1)) in printf "%s %Ld\n" checksum size let () = with_in_channel open_file file get_info approx-5.4/url.ml000066400000000000000000000101661225122005200140220ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2012 Eric C. Cooper Released under the GNU General Public License *) open Config open Log open Util let string_of_time t = Netdate.format ~fmt: "%a, %d %b %Y %T GMT" (Netdate.create ~zone: 0 t) let time_of_string s = Netdate.parse_epoch ?zone: None s let translate_request url = let path = relative_url url in match explode_path path with | dist :: rest -> (try implode_path (Config_file.get dist :: rest), path with Not_found -> error_message "No remote repository for %s" dist; raise Not_found) | [] -> invalid_string_arg "translate_request" url let reverse_translate url = let longest_match k v r = if k.[0] <> '$' && is_prefix v url then match r with | Some (dist, repo) as orig -> if String.length v > String.length repo then Some (k, v) else orig | None -> Some (k, v) else r in match Config_file.fold longest_match None with | Some (dist, repo) -> dist ^/ substring url ~from: (String.length repo + 1) | None -> raise Not_found type protocol = HTTP | HTTPS | FTP | FILE let protocol url = try match String.lowercase (substring url ~until: (String.index url ':')) with | "http" -> HTTP | "https" -> HTTPS | "ftp" -> FTP | "file" -> FILE | proto -> invalid_string_arg "unsupported URL protocol" proto with Not_found -> invalid_string_arg "no protocol in URL" url let rate_option = match String.lowercase max_rate with | "" | "none" | "unlimited" -> "" | str -> "--limit-rate " ^ str let curl_command options url = Printf.sprintf "/usr/bin/curl --fail --silent --header \"Pragma: no-cache\" %s %s %s" rate_option (String.concat " " options) (quoted_string url) let head_command = curl_command ["--head"] let iter_headers proc chan = let next () = try Some (input_line chan) with End_of_file -> None in let rec loop () = match next () with | Some header -> let n = String.length header in if n > 0 && header.[n - 1] = '\r' then if n > 1 then begin proc (String.sub header 0 (n - 1)); loop () end else () (* CRLF terminates headers *) else error_message "Unexpected header: %s" header | None -> () in loop () let head url callback = let cmd = head_command url in debug_message "Command: %s" cmd; with_process cmd ~error: url (iter_headers callback) let download_command headers header_callback = let hdr_opts = List.map (fun h -> "--header " ^ quoted_string h) headers in let options = match header_callback with | Some _ -> "--include" :: hdr_opts | None -> hdr_opts in curl_command options let iter_body proc chan = let len = 4096 in let buf = String.create len in let rec loop () = match input chan buf 0 len with | 0 -> () | n -> proc buf 0 n; loop () in loop () let seq f g x = (f x; g x) let download url ?(headers=[]) ?header_callback callback = let cmd = download_command headers header_callback url in debug_message "Command: %s" cmd; with_process cmd ~error: url (match header_callback with | Some proc -> seq (iter_headers proc) (iter_body callback) | None -> iter_body callback) (* Find the remote URL corresponding to a given relative pathname in the cache, or raise Not_found if it does not correspond to a known mapping *) let translate_file file = match explode_path file with | dist :: path -> Config_file.get dist ^/ implode_path path | _ -> invalid_string_arg "translate_file" file let download_file file = let file' = gensym file in let options = ["--output"; file'; "--remote-time"; "--location"; "--max-redirs"; string_of_int max_redirects] @ (if Sys.file_exists file then ["--time-cond"; quoted_string (string_of_time (file_modtime file))] else []) in let cmd = curl_command options (translate_file file) in debug_message "Command: %s" cmd; if Sys.command cmd = 0 then (* file' may not exist if file was not modified *) try Sys.rename file' file with _ -> () else begin rm file'; failwith ("cannot download " ^ file) end approx-5.4/url.mli000066400000000000000000000022001225122005200141610ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2011 Eric C. Cooper Released under the GNU General Public License *) (* Translate a request URL to the remote repository URL and return it together with the relative path for the cache *) val translate_request : string -> string * string (* Translate a remote URL back to a relative path for the cache *) val reverse_translate : string -> string type protocol = HTTP | HTTPS | FTP | FILE val protocol : string -> protocol (* Perform HTTP HEAD (or equivalent for FTP and FILE) on the given URL and apply a callback to each header that is returned *) val head : string -> (string -> unit) -> unit (* Download the specified URL with optional request headers, then apply callbacks to the headers and body chunks *) val download : string -> ?headers:string list -> ?header_callback:(string -> unit) -> (string -> int -> int -> unit) -> unit (* Download a file from a remote repository *) val download_file : string -> unit (* Format and parse HTTP-compliant times *) val time_of_string : string -> float val string_of_time : float -> string approx-5.4/util.ml000066400000000000000000000236101225122005200141730ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2013 Eric C. Cooper Released under the GNU General Public License *) open Printf open Unix open Unix.LargeFile let invalid_string_arg msg arg = invalid_arg (msg ^ ": " ^ arg) let is_prefix pre str = let prefix_len = String.length pre in let string_len = String.length str in let rec loop i = if i = prefix_len then true else if i = string_len || pre.[i] <> str.[i] then false else loop (i + 1) in loop 0 let substring ?(from=0) ?until str = let n = String.length str in let until = match until with Some i -> i | None -> n in if from = 0 && until = n then str else String.sub str from (until - from) let split sep str = let next i = try Some (String.index_from str i sep) with Not_found -> None in let rec loop acc i = match next i with | Some j -> loop (substring str ~from: i ~until: j :: acc) (j + 1) | None -> substring str ~from: i :: acc in List.rev (loop [] 0) let join sep list = String.concat (String.make 1 sep) list let split_lines = split '\n' let explode_path = split '/' let implode_path = join '/' let (^/) = Filename.concat let make_directory path = (* Create a directory component in the path. Since it might be created concurrently, we have to ignore the Unix EEXIST error: simply testing for existence first introduces a race condition. *) let make_dir name = try mkdir name 0o755 with Unix_error (EEXIST, _, _) -> if not (Sys.is_directory name) then failwith ("file " ^ name ^ " is not a directory") in let rec loop cwd = function | dir :: rest -> let name = cwd ^/ dir in make_dir name; loop name rest | [] -> () in match explode_path path with | "" :: dirs -> loop "/" dirs | dirs -> loop "." dirs let quoted_string = sprintf "%S" (* Return the relative portion of a pathname *) let relative_path path = let n = String.length path in let rec loop i = if i = n then "." else if path.[i] <> '/' then String.sub path i (n - i) else loop (i + 1) in loop 0 let relative_url path = try if path.[0] = '/' then relative_path path else let i = String.index path ':' in if path.[i + 1] = '/' && path.[i + 2] = '/' then let j = String.index_from path (i + 3) '/' in relative_path (substring path ~from: j) else raise Exit with _ -> failwith ("malformed URL: " ^ path) (* Split a filename into the leading portion without an extension and the extension, if any, beginning with '.' *) let split_extension file = let base = Filename.basename file in (* look for '.' in basename only, not parent directories *) try let i = String.rindex base '.' in let dir = Filename.dirname file in let name = substring base ~until: i in let ext = substring base ~from: i in (if dir = "." then name else dir ^/ name), ext with Not_found -> (file, "") (* Return a filename with its extension, if any, removed *) let without_extension file = fst (split_extension file) let extension file = snd (split_extension file) let the = function Some x -> x | None -> raise Not_found (* private exception to wrap any exception raised during cleanup action *) exception Unwind of exn let unwind_protect body post = try let result = body () in try post (); result with e -> raise (Unwind e) with | Unwind e -> raise e (* assume cleanup has been done *) | e -> post (); raise e (* Apply a function to a resource that is acquired and released by the given functions *) let with_resource release acquire x f = let res = acquire x in unwind_protect (fun () -> f res) (fun () -> release res) let with_in_channel openf = with_resource close_in openf let with_out_channel openf = with_resource close_out openf let with_process ?error cmd = let close chan = if close_process_in chan <> WEXITED 0 then failwith (match error with | None -> cmd | Some msg -> msg) in with_resource close open_process_in cmd let gensym str = sprintf "%s.%d.%09.0f" (without_extension str) (getpid ()) (fst (modf (gettimeofday ())) *. 1e9) (* Use the default temporary directory unless it has been set to something inaccessible, in which case use "/tmp" *) let tmp_dir_name = ref None (* Return the name of the temporary file directory *) let tmp_dir () = match !tmp_dir_name with | Some dir -> dir | None -> let dir = try let dir = Filename.temp_dir_name in access dir [R_OK; W_OK; X_OK]; dir with Unix_error _ -> "/tmp" in tmp_dir_name := Some dir; dir let rm file = try Sys.remove file with _ -> () (* Decompression programs for supported compression formats *) let decompressors = [".bz2", "/bin/bzcat"; ".gz", "/bin/zcat"; ".xz", "/usr/bin/xzcat"] let compressed_extensions = List.map fst decompressors (* Check if a file is compressed by examining its extension *) let is_compressed file = List.mem (extension file) compressed_extensions (* Decompress a file to a temporary file, rather than reading from a pipe or using the CamlZip library, so that we detect corrupted files before partially processing them. This is also significantly faster than using CamlZip. Return the temporary file name, which must be removed by the caller *) let decompress file = let prog = try List.assoc (extension file) decompressors with Not_found -> invalid_string_arg "decompress" file in let tmp = (tmp_dir ()) ^/ gensym (Filename.basename file) in let cmd = sprintf "%s %s > %s" prog file tmp in if Sys.command cmd = 0 then tmp else (rm tmp; failwith ("decompress " ^ file)) let with_decompressed file = with_resource rm decompress file let decompress_and_apply f file = if is_compressed file then with_decompressed file f else f file (* Return a channel for reading a possibly compressed file. *) let open_file = decompress_and_apply open_in let compressed_versions name = if is_compressed name then invalid_string_arg "compressed_versions" name; name :: List.map (fun ext -> name ^ ext) compressed_extensions let stat_file file = try Some (stat file) with Unix_error _ -> None let is_cached_nak name = match stat_file name with | Some { st_size = 0L; st_perm = 0 } -> true | _ -> false let file_modtime file = (stat file).st_mtime let file_ctime file = (stat file).st_ctime let minutes_old t = int_of_float ((Unix.time () -. t) /. 60. +. 0.5) let newest_file list = let newest cur name = match stat_file name with | None | Some { st_size = 0L; st_perm = 0 } (* cached NAK *) -> cur | Some { st_mtime = modtime } -> begin match cur with | Some (f, t) -> if modtime > t then Some (name, modtime) else cur | None -> Some (name, modtime) end in match List.fold_left newest None list with | Some (f, _) -> f | None -> raise Not_found let open_out_excl file = out_channel_of_descr (openfile file [O_CREAT; O_WRONLY; O_EXCL] 0o644) let with_temp_file name proc = let file = gensym name in with_out_channel open_out_excl file proc; file let update_ctime name = match stat_file name with | Some { st_atime = atime; st_mtime = mtime } -> utimes name atime mtime | None -> () let directory_id name = match stat_file name with | Some { st_kind = S_DIR; st_dev = dev; st_ino = ino } -> Some (dev, ino) | _ -> None let fold_fs_tree non_dirs f init path = let rec walk uids_seen init path = let visit uids acc name = walk uids acc (path ^/ name) in let uid = directory_id path in if uid <> None then if List.mem uid uids_seen then (* cycle detected *) init else let uids_seen = uid :: uids_seen in let children = try Sys.readdir path with _ -> [||] in let init = if non_dirs then init else f init path in Array.fold_left (visit uids_seen) init children else if non_dirs && Sys.file_exists path then f init path else init in walk [] init path let fold_dirs f = fold_fs_tree false f let fold_non_dirs f = fold_fs_tree true f let iter_of_fold fold proc = fold (fun () -> proc) () let iter_dirs = iter_of_fold fold_dirs let iter_non_dirs = iter_of_fold fold_non_dirs let file_size file = (stat file).st_size module type MD = sig type t val file : string -> t val to_hex : t -> string end module FileDigest (MsgDigest : MD) = struct let sum file = MsgDigest.to_hex (MsgDigest.file file) end let file_md5sum = let module F = FileDigest(Digest) in F.sum let file_sha1sum = let module F = FileDigest(Sha1) in F.sum let file_sha256sum = let module F = FileDigest(Sha256) in F.sum let user_id = object method kind = "user" method get = getuid method set = setuid method lookup x = (getpwnam x).pw_uid end let group_id = object method kind = "group" method get = getgid method set = setgid method lookup x = (getgrnam x).gr_gid end let drop_privileges ~user ~group = let drop id name = try id#set (id#lookup name) with | Not_found -> failwith ("unknown " ^ id#kind ^ " " ^ name) | Unix_error (EPERM, _, _) -> failwith (Sys.argv.(0) ^ " must be run by root" ^ (if user <> "root" then " or by " ^ user else "")) in (* change group first, since we must still be privileged to change user *) drop group_id group; drop user_id user let check_id ~user ~group = let check id name = try if id#get () <> id#lookup name then failwith ("not running as " ^ id#kind ^ " " ^ name) with Not_found -> failwith ("unknown " ^ id#kind ^ " " ^ name) in check user_id user; check group_id group let string_of_sockaddr sockaddr ~with_port = match sockaddr with | ADDR_INET (host, port) -> let addr = string_of_inet_addr host in if with_port then sprintf "%s port %d" addr port else addr | ADDR_UNIX path -> failwith ("Unix domain socket " ^ path) approx-5.4/util.mli000066400000000000000000000124061225122005200143450ustar00rootroot00000000000000(* approx: proxy server for Debian archive files Copyright (C) 2013 Eric C. Cooper Released under the GNU General Public License *) val invalid_string_arg : string -> string -> 'a (* Check if the first string is a prefix of the second *) val is_prefix : string -> string -> bool (* Extract substring s.[from] .. s.[until-1] *) val substring : ?from:int -> ?until:int -> string -> string (* Split a string at each occurrence of a separator *) val split : char -> string -> string list (* Join a list of strings with a separator (inverse of split) *) val join : char -> string list -> string (* Split a string into lines *) val split_lines : string -> string list (* Split a pathname into a list of components. Initial and final "/" map to empty strings; "/" by itself maps to [""; ""] *) val explode_path : string -> string list (* Inverse of explode_path *) val implode_path : string list -> string (* Infix operator to concatenate two pathname components *) val (^/) : string -> string -> string (* Create a directory, including any intermediate directories along the specified path (like "mkdir --parents") *) val make_directory : string -> unit (* Return a quoted string *) val quoted_string : string -> string (* Return the relative portion of a URL *) val relative_url : string -> string (* Return the extension of a filename, including the initial '.' *) val extension : string -> string (* Return the underlying value of an option, otherwise raise Not_found *) val the : 'a option -> 'a (* Call a function making sure that a cleanup procedure is called before returning the result of the function or raising an exception *) val unwind_protect : (unit -> 'a) -> (unit -> unit) -> 'a (* Open an input channel and apply a function to the channel, using unwind_protect to ensure that the channel gets closed *) val with_in_channel : ('a -> in_channel) -> 'a -> (in_channel -> 'b) -> 'b (* Open an output channel and apply a function to the channel, using unwind_protect to ensure that the channel gets closed *) val with_out_channel : ('a -> out_channel) -> 'a -> (out_channel -> 'b) -> 'b (* Spawn a shell command and apply a function to its output, using unwind_protect to ensure that the channel gets closed *) val with_process : ?error:string -> string -> (in_channel -> 'a) -> 'a (* Generate a unique string, suitable for use as a filename *) val gensym : string -> string (* Attempt to remove a file but ignore any errors *) val rm : string -> unit (* Decompress a file and apply a function to the temporary file name, using unwind_protect to ensure that the temporary file gets removed *) val with_decompressed : string -> (string -> 'a) -> 'a (* Apply a function to a file or to a temporary decompressed version of it *) val decompress_and_apply : (string -> 'a) -> string -> 'a (* Return a list of possible compressed versions of the given file *) val compressed_versions : string -> string list (* Return the newest file in a list, or raise Not_found if none exist *) val newest_file : string list -> string (* Open a file for input, decompressing it if necessary *) val open_file : string -> in_channel (* Open a file for exclusive output *) val open_out_excl : string -> out_channel (* Open a temporary file for output in the same directory as the given one (so that it can be renamed back to the original), apply the given function, and return the file name *) val with_temp_file : string -> (out_channel -> unit) -> string (* Update the ctime of the given file, if it exists, without changing its access or modification times *) val update_ctime : string -> unit (* Create a generic iterator function from a fold function *) val iter_of_fold : ((unit -> 'a) -> unit -> 'b) -> 'a -> 'b (* Fold a function over each directory below a given path *) val fold_dirs : ('a -> string -> 'a) -> 'a -> string -> 'a (* Apply a function to each directory below a given path *) val iter_dirs : (string -> unit) -> string -> unit (* Fold a function over each non-directory below a given path *) val fold_non_dirs : ('a -> string -> 'a) -> 'a -> string -> 'a (* Apply a function to each non-directory below a given path *) val iter_non_dirs : (string -> unit) -> string -> unit (* Return the Unix stat information *) val stat_file : string -> Unix.LargeFile.stats option (* Check if a file is a cached "file not found" *) val is_cached_nak : string -> bool (* Return the modification time of a file *) val file_modtime : string -> float (* Return the status change time of a file *) val file_ctime : string -> float (* Calculate the age in minutes of a timestamp *) val minutes_old : float -> int (* Return the size of a file *) val file_size : string -> int64 (* Return the MD5 digest of a file *) val file_md5sum : string -> string (* Return the SHA1 digest of a file *) val file_sha1sum : string -> string (* Return the SHA256 digest of a file *) val file_sha256sum : string -> string (* Drop privileges to those of the given user and group *) val drop_privileges : user:string -> group:string -> unit (* Check that the program is executing as the given user and group *) val check_id : user:string -> group:string -> unit (* Convert a socket address to a string *) val string_of_sockaddr : Unix.sockaddr -> with_port:bool -> string