r336 - in /debtorrent/trunk: ./ DebTorrent/BT1/FileSelector.py DebTorrent/SocketHandler.py docs/epydoc.config hippy.py uniquely.py

camrdale-guest at users.alioth.debian.org camrdale-guest at users.alioth.debian.org
Sat Jan 19 22:08:05 UTC 2008


Author: camrdale-guest
Date: Sat Jan 19 22:08:05 2008
New Revision: 336

URL: http://svn.debian.org/wsvn/debtorrent/?sc=1&rev=336
Log:
Merged revisions 273-274 via svnmerge from 
svn+ssh://camrdale-guest@svn.debian.org/svn/debtorrent/debtorrent/branches/unique

........
  r273 | camrdale-guest | 2007-08-19 16:23:10 -0700 (Sun, 19 Aug 2007) | 1 line
  
  Update hippy to save old sub-piece data for use with out-of-date mirrors.
........
  r274 | camrdale-guest | 2007-08-19 16:28:15 -0700 (Sun, 19 Aug 2007) | 1 line
  
  Fix some documentation errors.
........

Modified:
    debtorrent/trunk/   (props changed)
    debtorrent/trunk/DebTorrent/BT1/FileSelector.py
    debtorrent/trunk/DebTorrent/SocketHandler.py
    debtorrent/trunk/docs/epydoc.config
    debtorrent/trunk/hippy.py
    debtorrent/trunk/uniquely.py

Propchange: debtorrent/trunk/
------------------------------------------------------------------------------
--- svnmerge-integrated (original)
+++ svnmerge-integrated Sat Jan 19 22:08:05 2008
@@ -1,1 +1,1 @@
-/debtorrent/branches/http1.1:1-257 /debtorrent/branches/unique:1-204,209-213,216-217,219,222-225,227,229-236
+/debtorrent/branches/http1.1:1-257 /debtorrent/branches/unique:1-204,209-213,216-217,219,222-225,227,229-236,273-274

Modified: debtorrent/trunk/DebTorrent/BT1/FileSelector.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/DebTorrent/BT1/FileSelector.py?rev=336&op=diff
==============================================================================
--- debtorrent/trunk/DebTorrent/BT1/FileSelector.py (original)
+++ debtorrent/trunk/DebTorrent/BT1/FileSelector.py Sat Jan 19 22:08:05 2008
@@ -143,8 +143,8 @@
     def init_priorities(self, init_priority):
         """Initialize the priorities of all the files from the unpickled state.
         
-        @type new_priority: C{list} of C{int}
-        @param new_priority: the new file priorities
+        @type init_priority: C{list} of C{int}
+        @param init_priority: the new file priorities
         @rtype: C{boolean}
         @return: whether the initialization was successful
         

Modified: debtorrent/trunk/DebTorrent/SocketHandler.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/DebTorrent/SocketHandler.py?rev=336&op=diff
==============================================================================
--- debtorrent/trunk/DebTorrent/SocketHandler.py (original)
+++ debtorrent/trunk/DebTorrent/SocketHandler.py Sat Jan 19 22:08:05 2008
@@ -65,9 +65,9 @@
         @param sock: the socket to manage
         @type handler: unknown
         @param handler: the handler to use for all communications on the socket
-        @type ip: C{string}
-        @param ip: the IP address to use if one can't be obtained from the socket
-            (optional, defaults to 'unknown')
+        @type dns: (C{string}, C{int})
+        @param dns: the IP address and port to use if one can't be obtained
+            from the socket (optional, defaults to 'unknown')
         
         """
         

Modified: debtorrent/trunk/docs/epydoc.config
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/docs/epydoc.config?rev=336&op=diff
==============================================================================
--- debtorrent/trunk/docs/epydoc.config (original)
+++ debtorrent/trunk/docs/epydoc.config Sat Jan 19 22:08:05 2008
@@ -1,9 +1,9 @@
 [epydoc] # Epydoc section marker (required by ConfigParser)
 
-# The list of objects to document.  Objects can be named using
+# The list of objects to document. Objects can be named using
 # dotted names, module filenames, or package directory names.
 # Alases for this option include "objects" and "values".
-modules: DebTorrent btcompletedir.py btcopyannounce.py btmakemetafile.py btreannounce.py btrename.py btsetdebmirrors.py btshowmetainfo.py debtorrent-client.py debtorrent-tracker.py
+modules: DebTorrent btcompletedir.py btcopyannounce.py btmakemetafile.py btreannounce.py btrename.py btsetdebmirrors.py btshowmetainfo.py debtorrent-client.py debtorrent-tracker.py hippy.py uniquely.py
 
 # The type of output that should be generated.  Should be one
 # of: html, text, latex, dvi, ps, pdf.

Modified: debtorrent/trunk/hippy.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/hippy.py?rev=336&op=diff
==============================================================================
--- debtorrent/trunk/hippy.py (original)
+++ debtorrent/trunk/hippy.py Sat Jan 19 22:08:05 2008
@@ -1,14 +1,76 @@
 #!/usr/bin/env python
 
-"""Calculate the sub-piece hashes for large package files."""
+"""Calculate the sub-piece hashes for large package files.
+
+Run this script in the directory where the extrapieces files are to be stored.
+It's only command line argument is the Berkeley database containing the cached
+data from previous runs. Pass the paths of Release files to process into the
+standard input.
+
+For example::
+
+    find /var/www/debian -maxdepth 3 -name "Release" | hippy ../hippycache.bdb
+
+"""
 
 import bsddb, sha, binascii
-import os, sys
+import os, sys, gzip
 import struct
+from bz2 import BZ2File
 from math import ceil
 
 MAX_PIECE_SIZE = 512*1024
 CHUNK_SIZE = 16*1024
+
+# The Packages files to read
+EXTENSION = ".gz"
+
+def read_release(filename):
+    """Read the headers and Packages file names from a Release file.
+    
+    @type filename: C{string}
+    @param filename: the Release file to read
+    @rtype: C{dictionary}, C{list} of C{string}
+    @return: the headers and full file names of Packages files
+    
+    """
+
+    # Initialize the Release file variables
+    read_packages = False
+    headers = {}
+    packages = []
+    
+    f = open(filename, 'r')
+    
+    for line in f:
+        line = line.rstrip()
+
+        if line[:1] != " ":
+            read_packages = False
+            try:
+                # Read the various headers from the file
+                h, v = line.split(":", 1)
+                if h == "MD5Sum" or h == "SHA1" or h == "SHA256":
+                    read_packages = True
+                elif len(v) > 0:
+                    headers[h] = v[1:]
+            except:
+                # Bad header line, just ignore it
+                print "WARNING: Ignoring badly formatted Release line:", line
+
+            # Skip to the next line
+            continue
+        
+        # Read file names from the multiple hash sections of the file
+        if read_packages:
+            p = line.split()
+            if len(p) == 3 and p[2].endswith("Packages"+EXTENSION):
+                if p[2] not in packages:
+                    packages.append(p[2])
+    
+    f.close()
+    
+    return headers, packages
 
 def hash(file, piece_size):
     """Read a file and hash it's sub-pieces.
@@ -63,8 +125,53 @@
     n = 1 + size / MAX_PIECE_SIZE
     return max(MAX_PIECE_SIZE/2, int(ceil((float(size)/n)/CHUNK_SIZE))*CHUNK_SIZE)
 
+def cache2list(cache_value):
+    """Convert a cache value to a list of package names.
+    
+    The cache is stored as a string. The list is a repeating sequence of one
+    byte length followed by a string of that length. Therefore, the longest
+    string that can be stored is 256.
+    
+    @type cache_value: C{string}
+    @param cache_value: the cached value for this file
+    @rtype: C{list} of C{string}
+    @return: the list of package names stored in the cache
+    
+    """
+
+    if cache_value == "":
+        return []
+
+    deb_list = []
+    while len(cache_value) > 0:
+        length = ord(cache_value[0])
+        deb = cache_value[1:length+1]
+        cache_value = cache_value[length+1:]
+        deb_list.append(deb)
+        
+    return deb_list
+
+def list2cache(deb_list):
+    """Convert a list of package names to a cacheable value.
+    
+    @type deb_list: C{list} of C{string}
+    @param deb_list: the package names to create a cache value for
+    @rtype: C{string}
+    @return: the cacheable string
+    
+    """
+    
+    if not deb_list:
+        return ""
+    
+    cache_value = ""
+    for deb in deb_list:
+        assert len(deb) < 256
+        cache_value += chr(len(deb)) + deb
+    return cache_value
+
 def cache2hash(cache_value):
-    """Convert a list of sub-piece hashes to a cacheable value.
+    """Convert a cache value to a list of sub-piece hashes.
     
     The cache is stored as a string. The first 20 bytes are the SHA1 hash of
     the entire file. Then there are repeating 24 byte sequences, the first 4
@@ -117,50 +224,144 @@
         cache_value += struct.pack(">i", length) + binascii.a2b_hex(hash)
     return cache_value
 
-def sub_piece(cache, filename):
+def sub_piece(filename):
     """Calculate and print the sub-pieces for a single file.
+    
+    @type filename: C{String}
+    @param filename: the file to calculate sub pieces for
+    
+    """
+    
+    filename = filename.rstrip()
+    
+    # Get the size of the file
+    size = os.stat(filename).st_size
+    
+    if size <= MAX_PIECE_SIZE:
+        # No sub-pieces are needed for this file
+        sha1 = ""
+        piece_list = []
+    else:
+        # Calculate all the sub-piece hashes
+        piece_size = optimal_piece_size(size)
+        file = open(filename)
+        sha1, piece_list = hash(file, piece_size)
+        file.close()
+        
+    return sha1, piece_list
+
+def get_packages(filename):
+    """Read the new piece data from a Packages file.
+    
+    @type filename: C{string}
+    @param filename: the Packages file to open and parse
+    @rtype: C{list} of C{string}
+    @return: the package files listed in the Packages file
+    
+    """
+
+    # Open the possibly compressed file
+    if filename.endswith(".gz"):
+        f = gzip.open(filename, 'r')
+    elif filename.endswith(".bz2"):
+        f = BZ2File(filename, "r")
+    else:
+        f = open(filename, 'r')
+
+    debs = []
+    
+    p = [None]
+    for line in f:
+        line = line.rstrip()
+
+        if line == "":
+            if p[0]:
+                debs.append(p[0])
+            p = [None]
+        if line[:9] == "Filename:":
+            p[0] = line[10:]
+    
+    f.close()
+    
+    return debs
+
+def run(cache, releasefile):
+    """Process a single Release file.
     
     @type cache: C{bsddb.BTree}
     @param cache: an already opened bDB b-tree
-    @type filename: C{String}
-    @param filename: the file to calculate sub pieces for
-    
-    """
-    
-    filename = filename.rstrip()
-    
-    # Check if this file's sub-pieces are already known
-    fnkey = filename + ":pc"
-    if cache.has_key(fnkey):
-        # Use the cached result
-        sha1, piece_list = cache2hash(cache[fnkey])
-    else:
-        # Get the size fo the file
-        size = os.stat(filename).st_size
-        
-        if size <= MAX_PIECE_SIZE:
-            # No sub-pieces are needed for this file
-            cache_value = ""
-            piece_list = []
+    @type releasefile: C{string}
+    @param releasefile: the Release file to process
+
+    """
+    
+    # Process the Release file
+    print "Processing: %s" % releasefile
+    root_dir = releasefile[:releasefile.index('/dists/')+1]
+    release_dir = releasefile[len(root_dir):].rsplit('/', 1)[0] + '/'
+    release_headers, packages = read_release(releasefile)
+    
+    file_prefix = "dists_" + release_headers.get("Codename", "") + "_"
+    file_suffix = "_Packages-extrapieces.gz"
+    
+    for packages_file in packages:
+        sub_filename = file_prefix + '_'.join(packages_file.split('/')[:-1]) + file_suffix
+        pkey = release_dir + packages_file + ":pl"
+        
+        # Get the list of packages in the packages file
+        debs = get_packages(root_dir + release_dir + packages_file)
+        
+        # Retrieve the saved list of sub-pieced packages in the Packages file
+        if cache.has_key(pkey):
+            packages_list = cache2list(cache[pkey])
         else:
-            # Calculate all the sub-piece hashes
-            piece_size = optimal_piece_size(size)
-            file = open(filename)
-            sha1, piece_list = hash(file, piece_size)
-            cache_value = hash2cache(sha1, piece_list)
-            file.close()
+            packages_list = []
+        all_debs = {}.fromkeys(packages_list, 1)
+
+        # First, sub-piece any new package files
+        for deb in debs:
+            filename = root_dir + deb
+            fnkey = deb + ":pc"
+
+            # Check if this file's sub-pieces are already known
+            if cache.has_key(fnkey):
+                sha1, piece_list = cache2hash(cache[fnkey])
+            else:
+                print '    Hashing new package:', deb
+                sha1, piece_list = sub_piece(filename)
+
+                # Save the result for next time
+                cache[fnkey] = hash2cache(sha1, piece_list)
+                
+            # If it has sub-pieces, save it to the list
+            if piece_list:
+                all_debs[deb] = 1
+        
+        # Write the list back to the cache
+        packages_list = all_debs.keys()
+        packages_list.sort()
+        cache[pkey] = list2cache(packages_list)
+
+        # Write the sub-piece data to the file
+        sub_file = gzip.open(sub_filename, 'w')
+        for deb in packages_list:
+            fnkey = deb + ":pc"
             
-        # Save the result for next time
-        cache[fnkey] = cache_value
-
-    if piece_list:
-        # Print the resulting sub-piece hashes
-        print "Filename: %s" % (filename)
-        print "SHA1: %s" % (sha1)
-        print "SHA1-Pieces:"
-        for x in piece_list:
-            print " %s %d" % x
-        print ""
+            # Check to make sure
+            if cache.has_key(fnkey):
+                # Get the cached result
+                sha1, piece_list = cache2hash(cache[fnkey])
+                
+                # Print the resulting sub-piece hashes
+                sub_file.write("Filename: %s\n" % (deb))
+                sub_file.write("SHA1: %s\n" % (sha1))
+                sub_file.write("SHA1-Pieces:\n")
+                for x in piece_list:
+                    sub_file.write(" %s %d\n" % x)
+                sub_file.write("\n")
+            else:
+                print "WARNING: no sub-piece data found for " + deb
+        sub_file.close()
 
 if __name__ == '__main__':
     
@@ -168,9 +369,10 @@
     cache_file = sys.argv[1]
     cache = bsddb.btopen(cache_file, "w")
 
-    # Read files to sub-piece from standard in
+    # Read Release file names from standard in
     for filename in sys.stdin:
-        sub_piece(cache, filename)
+        filename = filename.rstrip()
+        run(cache, filename)
 
     # Close the cache file
     cache.sync()

Modified: debtorrent/trunk/uniquely.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/uniquely.py?rev=336&op=diff
==============================================================================
--- debtorrent/trunk/uniquely.py (original)
+++ debtorrent/trunk/uniquely.py Sat Jan 19 22:08:05 2008
@@ -33,7 +33,7 @@
 def read_release(filename):
     """Read the headers and Packages file names from a Release file.
     
-    @type filename: C[string}
+    @type filename: C{string}
     @param filename: the Release file to read
     @rtype: C{dictionary}, C{list} of C{string}
     @return: the headers and full file names of Packages files
@@ -81,7 +81,7 @@
 def get_old(old_file):
     """Read the headers and piece ordering data from an old file.
     
-    @type old_file: C[string}
+    @type old_file: C{string}
     @param old_file: the old piece ordering file to open
     @rtype: C{dictionary}, C{dictionary}
     @return: the old piece ordering (keys are the file names, values are the
@@ -189,7 +189,7 @@
     piece ordering. This is done by modifying the input old_all_files,
     all_pieces, and all_new_pieces variables.
     
-    @type filename: C[string}
+    @type filename: C{string}
     @param filename: the Packages file to open and parse
     @type old_files: C{dictionary}
     @param old_files: the original piece ordering, keys are the file names,
@@ -301,7 +301,7 @@
 def write_file(filename, pieces, headers):
     """Print the new data to the file.
     
-    @type filename: C[string}
+    @type filename: C{string}
     @param filename: the file to write to
     @type pieces: C{dictionary}
     @param pieces: the current piece ordering, keys are the starting piece
@@ -331,7 +331,7 @@
 def run(releasefile):
     """Process a single Release file.
     
-    @type releasefile: C[string}
+    @type releasefile: C{string}
     @param releasefile: the Release file to process
 
     """




More information about the Debtorrent-commits mailing list