r382 - /debtorrent/trunk/DebTorrent/HTTPCache.py

Fri Jun 20 03:27:06 UTC 2008

Author: camrdale-guest
Date: Fri Jun 20 03:27:06 2008
New Revision: 382

URL: http://svn.debian.org/wsvn/debtorrent/?sc=1&rev=382
Log:
HTTP Cache downloader can decompress while downloading.

Modified:
    debtorrent/trunk/DebTorrent/HTTPCache.py

Modified: debtorrent/trunk/DebTorrent/HTTPCache.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/DebTorrent/HTTPCache.py?rev=382&op=diff
==============================================================================

--- debtorrent/trunk/DebTorrent/HTTPCache.py (original)
+++ debtorrent/trunk/DebTorrent/HTTPCache.py Fri Jun 20 03:27:06 2008
@@ -27,6 +27,9 @@
 from os import utime, makedirs, listdir
 from time import strftime, strptime, gmtime
 from calendar import timegm
+from bz2 import BZ2Decompressor
+from zlib import decompressobj, MAX_WBITS
+from gzip import FCOMMENT, FEXTRA, FHCRC, FNAME, FTEXT
 import logging
 
 logger = logging.getLogger('DebTorrent.HTTPCache')
@@ -51,7 +54,7 @@
     
     """
     
-    def __init__(self, path, func, filename):
+    def __init__(self, path, func, filename, decompress = False):
         """Initialize the instance.
         
         @type path: C{list} of C{string}
@@ -60,12 +63,18 @@
         @param func: the method to call when the download completes
         @type filename: C{string}
         @param filename: the file to save the downloaded data to
+        @type decompress: C{boolean}
+        @param decompress: whether to decompress the file while downloading it
+            (optional, defaults to not decompressing)
         
         """
         
         self.path = path
         self.func = func
         self.filename = filename
+        self.decompress = decompress
+        self.gzfile = None
+        self.bz2file = None
         self.response = None
         
     def save_response(self, r):
@@ -85,10 +94,34 @@
             
             # Write the new file
             f = open(self.filename, 'wb')
+            if self.decompress and self.filename.endswith(".gz"):
+                self.gzheader = True
+                self.gzfile = open(self.filename[:-3], 'wb')
+                self.gzdec = decompressobj(-MAX_WBITS)
+            elif self.decompress and self.filename.endswith(".bz2"):
+                self.bz2file = open(self.filename[:-4], 'wb')
+                self.bz2dec = BZ2Decompressor()
+
             data = r.read(4096)
             while len(data) > 0:
                 f.write(data)
+                if self.gzfile:
+                    # Decompress the zlib portion of the file
+                    if self.gzheader:
+                        # Remove the gzip header junk
+                        self.gzheader = False
+                        new_data = self._remove_gzip_header(data)
+                        dec_data = self.gzdec.decompress(new_data)
+                    else:
+                        dec_data = self.gzdec.decompress(data)
+                    self.gzfile.write(dec_data)
+                elif self.bz2file:
+                    # Decompress the bz2 file
+                    dec_data = self.bz2dec.decompress(data)
+                    self.bz2file.write(dec_data)
+                    
                 data = r.read(4096)
+                
             f.close()
             r.close()
             
@@ -99,6 +132,32 @@
                 utime(self.filename, times)
             except:
                 logger.exception('Failed to set the cache time for the file')
+                times = None
+
+            # Close the decompressed file
+            if self.gzfile:
+                # Finish the decompression 
+                data_dec = self.gzdec.flush()
+                self.gzfile.write(data_dec)
+                self.gzfile.close()
+                self.gzfile = None
+
+                # Set the modified time (on error use current time which should work)
+                if times:
+                    try:
+                        utime(self.filename[:-3], times)
+                    except:
+                        logger.exception('Failed to set the cache time for the decompressed file')
+            elif self.bz2file:
+                self.bz2file.close()
+                self.bz2file = None
+
+                # Set the modified time (on error use current time which should work)
+                if times:
+                    try:
+                        utime(self.filename[:-4], times)
+                    except:
+                        logger.exception('Failed to set the cache time for the decompressed file')
         else:
             data = r.read()
         
@@ -115,6 +174,43 @@
                 
         self.response = (r.status, r.reason, headers, data)
         
+    def _remove_gzip_header(self, data):
+        """Remove the gzip header from the zlib compressed data."""
+        # Read, check & discard the header fields
+        if data[:2] != '\037\213':
+            raise IOError, 'Not a gzipped file'
+        if ord(data[2]) != 8:
+            raise IOError, 'Unknown compression method'
+        flag = ord(data[3])
+        # modtime = self.fileobj.read(4)
+        # extraflag = self.fileobj.read(1)
+        # os = self.fileobj.read(1)
+
+        skip = 10
+        if flag & FEXTRA:
+            # Read & discard the extra field
+            xlen = ord(data[10])
+            xlen = xlen + 256*ord(data[11])
+            skip = skip + 2 + xlen
+        if flag & FNAME:
+            # Read and discard a null-terminated string containing the filename
+            while True:
+                if not data[skip] or data[skip] == '\000':
+                    break
+                skip += 1
+            skip += 1
+        if flag & FCOMMENT:
+            # Read and discard a null-terminated string containing a comment
+            while True:
+                if not data[skip] or data[skip] == '\000':
+                    break
+                skip += 1
+            skip += 1
+        if flag & FHCRC:
+            skip += 2     # Read & discard the 16-bit header CRC
+
+        return data[skip:]
+
     def error(self, error_msg):
         """Save an error response.
         
@@ -322,13 +418,16 @@
         self.downloads = {}
         self.cachedir = cachedir
 
-    def download_get(self, path, func):
+    def download_get(self, path, func, decompress = False):
         """Create a new download from a site.
         
         @type path: C{list} of C{string}
         @param path: the server and path to download
         @type func: C{method}
         @param func: the method to call with the data when the download is complete
+        @type decompress: C{boolean}
+        @param decompress: whether to decompress the file while downloading it
+            (optional, defaults to not decompressing)
         
         """
         
@@ -337,7 +436,7 @@
             self.downloads[path[0]] = CacheConnection(self, path[0])
 
         filename = self.get_filename(path)
-        if not self.downloads[path[0]].queue(CacheRequest(path, func, filename)):
+        if not self.downloads[path[0]].queue(CacheRequest(path, func, filename, decompress)):
             func(path, (500, 'Internal Server Error', 
                         {'Server': VERSION, 
                          'Content-Type': 'text/html; charset=iso-8859-1'},