r379 - /debtorrent/trunk/hippy.py

camrdale-guest at users.alioth.debian.org camrdale-guest at users.alioth.debian.org
Wed May 21 22:07:19 UTC 2008


Author: camrdale-guest
Date: Wed May 21 22:07:18 2008
New Revision: 379

URL: http://svn.debian.org/wsvn/debtorrent/?sc=1&rev=379
Log:
add ability for hippy to clean its cache of old entries

Modified:
    debtorrent/trunk/hippy.py

Modified: debtorrent/trunk/hippy.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/hippy.py?rev=379&op=diff
==============================================================================
--- debtorrent/trunk/hippy.py (original)
+++ debtorrent/trunk/hippy.py Wed May 21 22:07:18 2008
@@ -208,8 +208,6 @@
     @param filename: the file to calculate sub pieces for
     
     """
-    sys.stderr.write('        Hashing: %s\n' % filename)
-    
     # Get the size of the file
     size = os.stat(filename).st_size
     
@@ -218,6 +216,8 @@
         sha1 = ""
         piece_list = []
     else:
+        sys.stderr.write('        Hashing: %s\n' % filename)
+    
         # Calculate all the sub-piece hashes
         piece_size = optimal_piece_size(size)
         file = open(filename)
@@ -312,6 +312,48 @@
     sys.stderr.write('    Saving %d new files\n' % len(new_debs))
     cache[pkey] = list2cache(new_debs)
 
+def clean(cache):
+    """Clean unused hash entries from the cache.
+    
+    @type cache: C{bsddb.BTree}
+    @param cache: an already opened bDB b-tree
+
+    """
+    sys.stderr.write('Cleaning the cache\n')
+
+    # Get the sets of all files and package lists    
+    packages = set()
+    files = set()
+    unused_hashes = set()
+    empty_hashes = 0
+    for key in cache.keys():
+        if key.endswith(':pl'):
+            d = cache2list(cache[key])
+            packages.update(set(d.keys()))
+        elif key.endswith(':pc'):
+            if cache[key]:
+                files.add(key[:-3])
+            else:
+                empty_hashes += 1
+                if not os.path.exists(key[:-3]):
+                    unused_hashes.add(key[:-3])
+
+    # Find which files are no longer needed
+    old_empty_hashes = len(unused_hashes)
+    sys.stderr.write('Found %d/%d empty hashes for old files\n' %
+                    (old_empty_hashes, empty_hashes))
+    unused_hashes.update(files.difference(packages))
+    missing_hashes = packages.difference(files)
+    sys.stderr.write('Found %d/%d hashes that are no longer needed\n' %
+                     (len(unused_hashes) - old_empty_hashes, len(files)))
+    sys.stderr.write('Found %d/%d hashes that are missing\n' %
+                     (len(missing_hashes), len(packages)))
+
+    # Remove the no longer needed files
+    for file in unused_hashes:
+        sys.stderr.write('Removing file: %s\n' % (file, ))
+        del cache[file + ':pc']
+
 if __name__ == '__main__':
     
     # Open the cache file specified on the command line
@@ -321,7 +363,10 @@
     # Get the Packages file name being processed
     pkg_file = sys.argv[2]
     
-    run(cache, pkg_file)
+    if pkg_file == "CLEAN":
+        clean(cache)
+    else:
+        run(cache, pkg_file)
 
     # Close the cache file
     cache.sync()




More information about the Debtorrent-commits mailing list