[Pkg-ocaml-maint-commits] r5812 - in /trunk/projects/approx/trunk: approx.ml debian/changelog doc/README.concurrency server.ml util.ml

ecc-guest at users.alioth.debian.org ecc-guest at users.alioth.debian.org
Sat Jun 28 18:33:47 UTC 2008


Author: ecc-guest
Date: Sat Jun 28 18:33:46 2008
New Revision: 5812

URL: http://svn.debian.org/wsvn/?sc=1&rev=5812
Log:
fix race condition when opening cache files

use hints to avoid duplicate downloads

update concurrency document to reflect new scheme

fix local file URLs

Modified:
    trunk/projects/approx/trunk/approx.ml
    trunk/projects/approx/trunk/debian/changelog
    trunk/projects/approx/trunk/doc/README.concurrency
    trunk/projects/approx/trunk/server.ml
    trunk/projects/approx/trunk/util.ml

Modified: trunk/projects/approx/trunk/approx.ml
URL: http://svn.debian.org/wsvn/trunk/projects/approx/trunk/approx.ml?rev=5812&op=diff
==============================================================================
--- trunk/projects/approx/trunk/approx.ml (original)
+++ trunk/projects/approx/trunk/approx.ml Sat Jun 28 18:33:46 2008
@@ -38,29 +38,30 @@
 
 let stat_file name = try Some (stat name) with Unix_error _ -> None
 
-(* Temporary name in case the download is interrupted *)
-
-let in_progress name = name ^ ".tmp"
+(* Hint that a download is in progress *)
+
+let in_progress name = name ^ ".hint"
 
 let wait_for_download_in_progress name =
-  let name' = in_progress name in
-  let rec wait n prev =
-    match stat_file name' with
-    | Some { st_size = cur } ->
-        if cur = prev && n = max_wait then begin
+  let hint = in_progress name in
+  let timeout = float_of_int max_wait in
+  let rec wait n =
+    match stat_file hint with
+    | Some { st_mtime = mtime } ->
+        if time () -. mtime > timeout then begin
           error_message "Concurrent download of %s is taking too long" name;
-          (* remove the other process's .tmp file if it still exists,
+          (* remove the other process's hint file if it still exists,
              so we can create our own *)
-          rm name'
+          rm hint
         end else begin
-          if prev = 0L then
+          if n = 0 then
             debug_message "Waiting for concurrent download of %s" name;
           sleep 1;
-          wait (if cur = prev then n + 1 else 0) cur
+          wait (n + 1)
         end
     | None -> ()
   in
-  wait 0 0L
+  wait 0
 
 let debug_headers msg headers =
   debug_message "%s" msg;
@@ -83,7 +84,6 @@
   debug_headers "Local response" env#output_header_fields;
   Done (`File (`Ok, None, cache_dir ^/ name, 0L, size))
 
-let not_found = Done (`Std_response (`Not_found, None, None))
 let not_modified = Done (`Std_response (`Not_modified, None, None))
 
 let cache_hit name ims mod_time env =
@@ -97,6 +97,8 @@
     end
   else Missing
 
+let not_found = Done (`Std_response (`Not_found, None, None))
+
 let deny name =
   debug_message "Denying %s" name;
   not_found
@@ -144,10 +146,11 @@
   | "" :: dirs -> loop "/" dirs
   | dirs -> loop "." dirs
 
-let create_file path =
-  make_directory (Filename.dirname path);
-  (* open file exclusively so we don't conflict with a concurrent download *)
-  open_out_excl path
+let create_hint name =
+  make_directory (Filename.dirname name);
+  close (openfile (in_progress name) [O_CREAT; O_WRONLY] 0o644)
+
+let remove_hint name = rm (in_progress name)
 
 type cache_info = { file : string; tmp_file : string; chan : out_channel }
 
@@ -169,8 +172,9 @@
   end else
     try
       debug_message "  open cache %s" file;
-      let tmp_file = in_progress file in
-      let chan = create_file tmp_file in
+      make_directory (Filename.dirname file);
+      let tmp_file = gensym file in
+      let chan = open_out_excl tmp_file in
       Cache { file = file; tmp_file = tmp_file; chan = chan }
     with e ->
       error_message "Cannot cache %s" file;
@@ -378,7 +382,13 @@
     | Url.FTP | Url.FILE -> download_ftp
   in
   let resp = new_response url name in
-  try dl resp url name ims cgi
+  let download_with_hint () =
+    create_hint name;
+    unwind_protect
+      (fun () -> dl resp url name ims cgi)
+      (fun () -> remove_hint name)
+  in
+  try download_with_hint ()
   with e ->
     remove_cache resp.cache;
     if e <> Failure url then info_message "%s" (string_of_exception e);

Modified: trunk/projects/approx/trunk/debian/changelog
URL: http://svn.debian.org/wsvn/trunk/projects/approx/trunk/debian/changelog?rev=5812&op=diff
==============================================================================
--- trunk/projects/approx/trunk/debian/changelog (original)
+++ trunk/projects/approx/trunk/debian/changelog Sat Jun 28 18:33:46 2008
@@ -1,3 +1,13 @@
+approx (3.3.0) unstable; urgency=low
+
+  * Allow empty host name in file URLs (closes: #479022)
+  * Use unique temporary names to avoid race condition
+    when opening cache files (closes: #488080)
+  * Use hint files to avoid (but not eliminate) duplicate downloads
+  * Remove unnecessary getpeername call in server loop
+
+ -- Eric Cooper <ecc at cmu.edu>  Sat, 03 May 2008 16:43:25 -0400
+
 approx (3.2.0) unstable; urgency=low
 
   * Listen on both IPv4 and IPv6 sockets when available

Modified: trunk/projects/approx/trunk/doc/README.concurrency
URL: http://svn.debian.org/wsvn/trunk/projects/approx/trunk/doc/README.concurrency?rev=5812&op=diff
==============================================================================
--- trunk/projects/approx/trunk/doc/README.concurrency (original)
+++ trunk/projects/approx/trunk/doc/README.concurrency Sat Jun 28 18:33:46 2008
@@ -1,7 +1,7 @@
 Concurrency control issues in approx
 
     Eric Cooper <ecc at cmu.edu>
-    2005 May 13
+    June 2008
 
 The priorities for dealing with concurrency in approx are:
 
@@ -10,7 +10,7 @@
 
 2. maintain good performance (minimize delays due to serialization,
    minimize number of downloads from remote repositories)
-   
+
 There are two sources of potential conflicts:
 
 A: between approx and gc_approx
@@ -21,12 +21,12 @@
 them from the cache.  While this is not fatal (it's only a cache,
 after all), it would have a severe performance impact.
 
-But approx downloads partial files with a ".tmp" extension, and only
-renames them upon successful completion.  And since the rename is
+But approx downloads partial files with a unique temporary extension, and
+only renames them upon successful completion.  And since the rename is
 atomic, this potential conflict is a non-problem.
 
-Another conflict can occur if gc_approx deletes a *.tmp file that is
-in the process of being downloaded on behalf of an approx client.
+Another conflict can occur if gc_approx deletes a temporary file that
+is in the process of being downloaded on behalf of an approx client.
 To avoid this, gc_approx doesn't delete recently-modified files.
 
 B: between concurrent approx processes
@@ -36,11 +36,14 @@
 
 But suppose a large download, like the main Packages.gz file, is in
 progress.  Another approx process might decide to download it also.
-To avoid this, we use the presence of a current ".tmp" version of the
+To avoid this, approx creates a hint file in the cache directory
+before starting the download.  We use the presence of a current hint
 file as an indication that the download is already in progress, and
 wait for it (at least for a while).
 
-When a requested file is not in the cache, it might be possible for two
-approx processes to attempt to download it simultaneously, overwriting
-one another's .tmp file.  To prevent this, approx opens the *.tmp
-cache file with the O_EXCL flag.
+There is still a race condition between the time that an approx
+process checks for the non-existence of a hint file and the time it
+creates it, so it is possible for two approx processes to download the
+same file simultaneously.  The atomic rename of unique temporary files
+ensures correctness in this case, at the cost of some network
+bandwidth.

Modified: trunk/projects/approx/trunk/server.ml
URL: http://svn.debian.org/wsvn/trunk/projects/approx/trunk/server.ml?rev=5812&op=diff
==============================================================================
--- trunk/projects/approx/trunk/server.ml (original)
+++ trunk/projects/approx/trunk/server.ml Sat Jun 28 18:33:46 2008
@@ -81,8 +81,8 @@
 
 let loop sockets service =
   let process sock =
-    let fd, _ = accept sock in
-    let address = remote_address (getpeername fd) ~with_port: false in
+    let fd, ip = accept sock in
+    let address = remote_address ip ~with_port: false in
     if Tcp_wrappers.hosts_ctl Version.name ~address then
       match fork () with
       | 0 ->

Modified: trunk/projects/approx/trunk/util.ml
URL: http://svn.debian.org/wsvn/trunk/projects/approx/trunk/util.ml?rev=5812&op=diff
==============================================================================
--- trunk/projects/approx/trunk/util.ml (original)
+++ trunk/projects/approx/trunk/util.ml Sat Jun 28 18:33:46 2008
@@ -60,7 +60,7 @@
     if path.[0] = '/' then relative_path path
     else
       let i = String.index path ':' in
-      if path.[i + 1] = '/' && path.[i + 2] = '/' && path.[i + 3] <> '/' then
+      if path.[i + 1] = '/' && path.[i + 2] = '/' then
         let j = String.index_from path (i + 3) '/' in
         relative_path (substring path ~from: j)
       else




More information about the Pkg-ocaml-maint-commits mailing list