[Pkg-ocaml-maint-commits] r1308 - in trunk/projects/approx: . debian

Eric Cooper ecc-guest@costa.debian.org
Sun, 24 Apr 2005 21:10:57 +0000


Author: ecc-guest
Date: 2005-04-24 21:10:56 +0000 (Sun, 24 Apr 2005)
New Revision: 1308

Modified:
   trunk/projects/approx/approx.ml
   trunk/projects/approx/debian/approx.init
   trunk/projects/approx/debian/changelog
   trunk/projects/approx/debian/control
   trunk/projects/approx/gc_approx.ml
   trunk/projects/approx/package.ml
   trunk/projects/approx/util.ml
   trunk/projects/approx/util.mli
Log:
approx:
    simplified daemonization
    improve Unix error messages
    remove write_pid logic to allow daemon to run as unprivileged user
    moved common code to Util
debian/control
    improved description
    add dependency on wget
gc_approx:
    remove unused distributions in cache
    remove empty parent directories
    don't recheck files that have already been checked
    uncompress Packages.gz files to disk
    (improves performance and detects corrupt files)
    use wget to download corrupt Packages.gz files when detected


Modified: trunk/projects/approx/approx.ml
===================================================================
--- trunk/projects/approx/approx.ml	2005-04-24 17:29:27 UTC (rev 1307)
+++ trunk/projects/approx/approx.ml	2005-04-24 21:10:56 UTC (rev 1308)
@@ -302,14 +302,6 @@
     message "%s" (Printexc.to_string e);
     respond_not_found ~url: path chan
 
-(* Split absolute path into top-level directory and rest of path.
-   Example: split_path "/a/b/c" = ("a", "b/c") *)
-
-let split_path path =
-  match explode_path path with
-  | "" :: head :: tail -> head, implode_path tail
-  | _ -> failwith ("split_path: " ^ path)
-
 let serve_file path headers chan =
   try
     if debug then
@@ -331,19 +323,19 @@
   | _ -> respond_forbidden ~url: req#path chan
 
 let daemon () =
-  Unix.chdir cache;
   ignore (Unix.setsid ());
   List.iter Unix.close [Unix.stdin; Unix.stdout; Unix.stderr];
-  print_config ();
-  main (daemon_spec ~port ~callback ~mode: `Single ~timeout: None ())
+  try
+    Unix.chdir cache;
+    print_config ();
+    main (daemon_spec ~port ~callback ~mode: `Single ~timeout: None ())
+  with
+  | Unix.Unix_error (err, str, _) ->
+      message "%s: %s" str (Unix.error_message err)
+  | e ->
+      message "%s" (Printexc.to_string e)
 
-let write_pid_file pid =
-  let chan = open_out ("/var/run" ^/ prog ^ ".pid") in
-  fprintf chan "%d\n" pid;
-  close_out chan
-
 let () =
-  if Unix.fork () = 0 then
-    match Unix.fork () with
-    | 0 -> daemon ()
-    | pid -> write_pid_file pid
+  (* double fork to detach daemon *)
+  if Unix.fork () = 0 && Unix.fork () = 0 then
+    daemon ()

Modified: trunk/projects/approx/debian/approx.init
===================================================================
--- trunk/projects/approx/debian/approx.init	2005-04-24 17:29:27 UTC (rev 1307)
+++ trunk/projects/approx/debian/approx.init	2005-04-24 21:10:56 UTC (rev 1308)
@@ -9,7 +9,6 @@
 DESC="proxy server for Debian archive files"
 NAME=approx
 DAEMON=/usr/sbin/$NAME
-PIDFILE=/var/run/$NAME.pid
 SCRIPTNAME=/etc/init.d/$NAME
 
 # Gracefully exit if the package has been removed.
@@ -25,24 +24,21 @@
 #	Function that starts the daemon/service.
 #
 d_start() {
-	start-stop-daemon --start --quiet --pidfile $PIDFILE \
-		--exec $DAEMON
+	start-stop-daemon --start --quiet --exec $DAEMON
 }
 
 #
 #	Function that stops the daemon/service.
 #
 d_stop() {
-	start-stop-daemon --stop --quiet --pidfile $PIDFILE \
-		--name $NAME
+	start-stop-daemon --stop --quiet --name $NAME
 }
 
 #
 #	Function that sends a SIGHUP to the daemon/service.
 #
 d_reload() {
-	start-stop-daemon --stop --quiet --pidfile $PIDFILE \
-		--name $NAME --signal 1
+	start-stop-daemon --stop --quiet --name $NAME --signal 1
 }
 
 case "$1" in

Modified: trunk/projects/approx/debian/changelog
===================================================================
--- trunk/projects/approx/debian/changelog	2005-04-24 17:29:27 UTC (rev 1307)
+++ trunk/projects/approx/debian/changelog	2005-04-24 21:10:56 UTC (rev 1308)
@@ -1,3 +1,10 @@
+approx (1.12) unstable; urgency=low
+
+  * New description, from suggestions by
+    Raphaƫl Berbain <raphael.berbain@free.fr>
+    
+  --
+
 approx (1.11) unstable; urgency=low
 
   * Daemonize correctly (closes: #305102)

Modified: trunk/projects/approx/debian/control
===================================================================
--- trunk/projects/approx/debian/control	2005-04-24 17:29:27 UTC (rev 1307)
+++ trunk/projects/approx/debian/control	2005-04-24 21:10:56 UTC (rev 1308)
@@ -8,7 +8,25 @@
 
 Package: approx
 Architecture: any
-Depends: ${shlibs:Depends}
-Description: proxy server for Debian archive files
- Approx is an alternative to apt-proxy or apt-cacher.
- It is written in OCaml so it is fast and robust.
+Depends: ${shlibs:Depends}, wget
+Description: caching proxy server for Debian archive files
+ Approx is an HTTP-based Debian archive server.
+ It fetches packages from remote repositories on demand,
+ and caches them for local use.
+ .
+ Approx saves time and network bandwidth if you need to install or
+ upgrade Debian software for a number of machines on a local network.
+ Each package is downloaded from a remote site only once,
+ regardless of how many local clients install it.
+ Approx's cache typically uses only a few gigabytes of disk space.
+ .
+ Approx also simplifies the administration of client machines:
+ repository locations need only be changed in approx's configuration file,
+ not in every client's /etc/apt/sources.list file.
+ .
+ Approx can be used as a replacement for apt-proxy,
+ with no need to modify clients' /etc/apt/sources.list files,
+ or as an alternative to apt-cacher, with no need to run Apache.
+ .
+ Approx is intended to be robust, simple, and efficient.
+ It is written in OCaml (Objective Caml).

Modified: trunk/projects/approx/gc_approx.ml
===================================================================
--- trunk/projects/approx/gc_approx.ml	2005-04-24 17:29:27 UTC (rev 1307)
+++ trunk/projects/approx/gc_approx.ml	2005-04-24 21:10:56 UTC (rev 1308)
@@ -2,8 +2,15 @@
    Copyright (C) 2005  Eric C. Cooper <ecc@cmu.edu>
    Released under the GNU General Public License *)
 
+(* Garbage-collect the approx cache using a mark-sweep algorithm.
+   Any file in the cache whose name, size, and checksum match an entry
+   in a Packages file is assumed to be valid, and kept.
+   Anything else (other than Packages, Release, and Source files)
+   is assumed to be invalid, and removed. *)
+
 open Util
 open Default_config
+open Printf
 
 let usage () =
   prerr_endline "Usage: gc_approx [options]";
@@ -12,21 +19,27 @@
   prerr_endline "    -f|--fast     do not validate MD5 checksums";
   prerr_endline "    -k|--keep     do not remove files";
   prerr_endline "    -q|--quiet    do not print file names";
+  prerr_endline "    -v|--verbose  print reason for removal";
   exit 1
 
-let checksum = ref true
-let verbose = ref true
-let remove = ref true
+let no_checksum = ref false
+let keep = ref false
+let quiet = ref false
+let verbose = ref false
 
 let () =
   for i = 1 to Array.length Sys.argv - 1 do
     match Sys.argv.(i) with
-    | "-f" | "--fast" -> checksum := false
-    | "-k" | "--keep" -> remove := false
-    | "-q" | "--quiet" -> verbose := false
+    | "-f" | "--fast" -> no_checksum := true
+    | "-k" | "--keep" -> keep := true
+    | "-q" | "--quiet" -> quiet := true
+    | "-v" | "--verbose" -> verbose := true
     | _ -> usage ()
   done
 
+(* Recursively descend the filesystem tree starting at [path],
+   applying [proc] to each leaf (non-directory). *)
+
 let rec treewalk proc path =
   let visit name =
     let path = path ^/ name in
@@ -37,34 +50,62 @@
   in
   Array.iter visit (try Sys.readdir path with Sys_error _ -> [||])
 
-let packages = ref []
+let roots = ref []
 
 let find_roots () =
   let find file =
     match Filename.basename file with
-    | "Packages" | "Packages.gz" -> packages := file :: !packages
+    | "Packages" | "Packages.gz" -> roots := file :: !roots
     | _ -> ()
   in
   Config.iter (fun dir _ -> treewalk find (cache ^/ dir))
 
+(* Extract the distribution and relative filename
+   from the absolute pathname of a file in the cache.
+   Example:
+       dist_prefix "/var/cache/approx/debian/pool/main/..."
+   returns
+       ("debian", "pool/main/...") *)
+
+let split_cache_pathname path =
+   split_path (substring path ~from: (String.length cache))
+
+(* The cache is probably only a small subset of all the files in
+   the Debian archive, so  we start with a table of filenames
+   actually present in this cache, and mark them as we process
+   the Packages files. *)
+
+type file_status =
+  | Unmarked
+  | Valid
+  | Wrong_size
+  | Wrong_checksum
+
 let files = Hashtbl.create 4096
+let get_status = Hashtbl.find files
+let set_status = Hashtbl.replace files
+let iter_status proc = Hashtbl.iter proc files
 
-let record_files () =
-  let record file =
-    match Filename.basename file with
+(* Determine whether a file is a candidate for garbage collection. *)
+
+let is_candidate file =
+  match Filename.basename file with
     | "Packages" | "Packages.gz"
     | "Release" | "Release.gz"
     | "Sources" | "Sources.gz" ->
-	() (* never consider these for garbage collection *)
+	let dist, _ = split_cache_pathname file in
+	(try ignore (Config.get dist); false
+	 with Not_found -> true (* not part of a known distribution *))
     | _ ->
-	Hashtbl.replace files file false
+	true
+
+let record_files () =
+  let record file =
+    if is_candidate file then set_status file Unmarked
   in
   treewalk record cache
 
-let dist_prefix path =
-  let n = String.length cache in
-  try substring path ~until: (String.index_from path (n+1) '/')
-  with Not_found -> failwith ("unexpected pathname: " ^ path)
+(* Handle the case of filename fields of the form ./path  *)
 
 let canonical path =
   if String.length path >= 2 && path.[0] = '.' && path.[1] = '/' then
@@ -72,6 +113,9 @@
   else
     path
 
+(* We mark a file as live if its size and MD5 checksum
+   match those specified in a Packages file. *)
+
 let mark_file prefix fields =
   let file = canonical (List.assoc "filename" fields) in
   let size = int_of_string (List.assoc "size" fields) in
@@ -81,32 +125,83 @@
     (Unix.stat path).Unix.st_size = size
   in
   let check_md5sum () =
-    not !checksum || Digest.to_hex (Digest.file path) = md5sum
+    !no_checksum || Digest.to_hex (Digest.file path) = md5sum
   in
   try
-    if check_size () && check_md5sum () then
-      Hashtbl.replace files path true
+    if get_status path = Unmarked then
+      if check_size () then
+	if check_md5sum () then
+	  set_status path Valid
+	else
+	  set_status path Wrong_checksum
+      else
+	set_status path Wrong_size
   with
-    Unix.Unix_error (Unix.ENOENT, "stat", _) -> ()
+    Not_found -> ()
 
+let download dist file package =
+  let url = Config.get dist ^/ file in
+  let package' = package ^ ".tmp" in
+  let cmd = sprintf "/usr/bin/wget -q -O %s %s" package' url in
+  prerr_string "downloading "; prerr_endline url;
+  if Sys.command cmd = 0 then
+    Sys.rename package' package
+  else
+    failwith ("cannot download " ^ url)
+
 let mark_package package =
-  Package.iter (mark_file (dist_prefix package)) package
+  if !verbose then (print_string "# "; print_endline package);
+  let dist, file = split_cache_pathname package in
+  let prefix = cache ^/ dist in
+  try
+    Package.iter (mark_file prefix) package
+  with Failure "decompress" ->
+    (* corrupt Packages.gz file: download it and try again *)
+    download dist file package;
+    Package.iter (mark_file prefix) package
 
 let mark () =
   find_roots ();
   record_files ();
-  List.iter mark_package !packages
+  List.iter mark_package !roots
 
+let message file status =
+  if !verbose then
+    let code =
+      match status with
+      | Unmarked -> ' '
+      | Wrong_size -> '='
+      | Wrong_checksum -> '!'
+      | Valid -> assert false
+    in
+    print_char code;
+    print_char ' '
+  else
+    ();
+  print_endline file
+
+(* Remove a file along with any parent directories that have been emptied. *)
+
+let remove file =
+  Sys.remove file;
+  let dir = Filename.dirname file in
+  (* rmdir complains about non-directory (i.e. symlink) parents,
+     so we redirect its output to /dev/null *)
+  let cmd = sprintf "/bin/rmdir -p %s >/dev/null 2>&1" dir in
+  ignore (Sys.command cmd)
+
 let sweep () =
-  let garbage_collect file marked =
-    if not marked then
+  let gc file status =
+    if status <> Valid then
       begin
-	if !verbose then print_endline file;
-	if !remove then Sys.remove file
+	if not !quiet then message file status;
+	if not !keep then remove file
       end
   in
-  Hashtbl.iter garbage_collect files
+  iter_status gc
 
-let () =
+let garbage_collect () =
   mark ();
   sweep ()
+
+let () = garbage_collect ()

Modified: trunk/projects/approx/package.ml
===================================================================
--- trunk/projects/approx/package.ml	2005-04-24 17:29:27 UTC (rev 1307)
+++ trunk/projects/approx/package.ml	2005-04-24 21:10:56 UTC (rev 1308)
@@ -2,24 +2,6 @@
    Copyright (C) 2005  Eric C. Cooper <ecc@cmu.edu>
    Released under the GNU General Public License *)
 
-let uncompress file =
-  let command = Printf.sprintf "/bin/gzip --decompress --stdout %s" file in
-  Unix.open_process_in command
-
-let with_package_file file proc =
-  let input =
-    if Filename.check_suffix file ".gz" then
-      uncompress file
-    else
-      open_in file
-  in
-  try
-    proc input;
-    close_in input
-  with e ->
-    close_in input;
-    raise e
-
 let parse line =
   let rec lskip i =
     if line.[i] <> ' ' then i else lskip (i + 1)
@@ -34,9 +16,9 @@
   let info = String.sub line i (j - i + 1) in
   name, info
 
-let read_paragraph input =
+let read_paragraph chan =
   let rec loop lines =
-    let line = input_line input in
+    let line = input_line chan in
     if line = "" then lines
     else if line.[0] = ' ' then
       (* line with leading space should be concatenated with previous line
@@ -47,9 +29,9 @@
   in
   loop []  (* reverse order doesn't matter *)
 
-let read proc input =
+let read proc chan =
   let next () =
-    try Some (read_paragraph input)
+    try Some (read_paragraph chan)
     with End_of_file -> None
   in
   let rec loop () =
@@ -59,5 +41,26 @@
   in
   loop ()
 
+(* Decompress a file in place, if necessary, and return its name.
+   This saves time during future runs of gc_approx (at the expense of
+   some disk space) and detects corrupted Packages.gz files. *)
+
+let decompressed file =
+  if Filename.check_suffix file ".gz" then
+    let cmd = Printf.sprintf "/bin/gunzip %s" file in
+    if Sys.command cmd <> 0 then failwith "decompress";
+    Filename.chop_suffix file ".gz"
+  else
+    file
+
+let with_open_file file proc =
+  let chan = open_in file in
+  try
+    proc chan;
+    close_in chan
+  with e ->
+    close_in chan;
+    raise e
+
 let iter proc file =
-  with_package_file file (read proc)
+  with_open_file (decompressed file) (read proc)

Modified: trunk/projects/approx/util.ml
===================================================================
--- trunk/projects/approx/util.ml	2005-04-24 17:29:27 UTC (rev 1307)
+++ trunk/projects/approx/util.ml	2005-04-24 21:10:56 UTC (rev 1308)
@@ -23,4 +23,9 @@
 
 let implode_path = String.concat "/"
 
+let split_path path =
+  match explode_path path with
+  | "" :: head :: tail -> head, implode_path tail
+  | _ -> failwith ("split_path: " ^ path)
+
 let (^/) = Filename.concat

Modified: trunk/projects/approx/util.mli
===================================================================
--- trunk/projects/approx/util.mli	2005-04-24 17:29:27 UTC (rev 1307)
+++ trunk/projects/approx/util.mli	2005-04-24 21:10:56 UTC (rev 1308)
@@ -19,3 +19,8 @@
 (* Infix operator to concatenate two pathname components. *)
 
 val (^/) : string -> string -> string
+
+(* Split absolute path into top-level directory and rest of path.
+   Example: split_path "/a/b/c" = ("a", "b/c") *)
+
+val split_path : string -> string * string