r382 - /debtorrent/trunk/DebTorrent/HTTPCache.py
camrdale-guest at users.alioth.debian.org
camrdale-guest at users.alioth.debian.org
Fri Jun 20 03:27:06 UTC 2008
Author: camrdale-guest
Date: Fri Jun 20 03:27:06 2008
New Revision: 382
URL: http://svn.debian.org/wsvn/debtorrent/?sc=1&rev=382
Log:
HTTP Cache downloader can decompress while downloading.
Modified:
debtorrent/trunk/DebTorrent/HTTPCache.py
Modified: debtorrent/trunk/DebTorrent/HTTPCache.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/DebTorrent/HTTPCache.py?rev=382&op=diff
==============================================================================
--- debtorrent/trunk/DebTorrent/HTTPCache.py (original)
+++ debtorrent/trunk/DebTorrent/HTTPCache.py Fri Jun 20 03:27:06 2008
@@ -27,6 +27,9 @@
from os import utime, makedirs, listdir
from time import strftime, strptime, gmtime
from calendar import timegm
+from bz2 import BZ2Decompressor
+from zlib import decompressobj, MAX_WBITS
+from gzip import FCOMMENT, FEXTRA, FHCRC, FNAME, FTEXT
import logging
logger = logging.getLogger('DebTorrent.HTTPCache')
@@ -51,7 +54,7 @@
"""
- def __init__(self, path, func, filename):
+ def __init__(self, path, func, filename, decompress = False):
"""Initialize the instance.
@type path: C{list} of C{string}
@@ -60,12 +63,18 @@
@param func: the method to call when the download completes
@type filename: C{string}
@param filename: the file to save the downloaded data to
+ @type decompress: C{boolean}
+ @param decompress: whether to decompress the file while downloading it
+ (optional, defaults to not decompressing)
"""
self.path = path
self.func = func
self.filename = filename
+ self.decompress = decompress
+ self.gzfile = None
+ self.bz2file = None
self.response = None
def save_response(self, r):
@@ -85,10 +94,34 @@
# Write the new file
f = open(self.filename, 'wb')
+ if self.decompress and self.filename.endswith(".gz"):
+ self.gzheader = True
+ self.gzfile = open(self.filename[:-3], 'wb')
+ self.gzdec = decompressobj(-MAX_WBITS)
+ elif self.decompress and self.filename.endswith(".bz2"):
+ self.bz2file = open(self.filename[:-4], 'wb')
+ self.bz2dec = BZ2Decompressor()
+
data = r.read(4096)
while len(data) > 0:
f.write(data)
+ if self.gzfile:
+ # Decompress the zlib portion of the file
+ if self.gzheader:
+ # Remove the gzip header junk
+ self.gzheader = False
+ new_data = self._remove_gzip_header(data)
+ dec_data = self.gzdec.decompress(new_data)
+ else:
+ dec_data = self.gzdec.decompress(data)
+ self.gzfile.write(dec_data)
+ elif self.bz2file:
+ # Decompress the bz2 file
+ dec_data = self.bz2dec.decompress(data)
+ self.bz2file.write(dec_data)
+
data = r.read(4096)
+
f.close()
r.close()
@@ -99,6 +132,32 @@
utime(self.filename, times)
except:
logger.exception('Failed to set the cache time for the file')
+ times = None
+
+ # Close the decompressed file
+ if self.gzfile:
+ # Finish the decompression
+ data_dec = self.gzdec.flush()
+ self.gzfile.write(data_dec)
+ self.gzfile.close()
+ self.gzfile = None
+
+ # Set the modified time (on error use current time which should work)
+ if times:
+ try:
+ utime(self.filename[:-3], times)
+ except:
+ logger.exception('Failed to set the cache time for the decompressed file')
+ elif self.bz2file:
+ self.bz2file.close()
+ self.bz2file = None
+
+ # Set the modified time (on error use current time which should work)
+ if times:
+ try:
+ utime(self.filename[:-4], times)
+ except:
+ logger.exception('Failed to set the cache time for the decompressed file')
else:
data = r.read()
@@ -115,6 +174,43 @@
self.response = (r.status, r.reason, headers, data)
+ def _remove_gzip_header(self, data):
+ """Remove the gzip header from the zlib compressed data."""
+ # Read, check & discard the header fields
+ if data[:2] != '\037\213':
+ raise IOError, 'Not a gzipped file'
+ if ord(data[2]) != 8:
+ raise IOError, 'Unknown compression method'
+ flag = ord(data[3])
+ # modtime = self.fileobj.read(4)
+ # extraflag = self.fileobj.read(1)
+ # os = self.fileobj.read(1)
+
+ skip = 10
+ if flag & FEXTRA:
+ # Read & discard the extra field
+ xlen = ord(data[10])
+ xlen = xlen + 256*ord(data[11])
+ skip = skip + 2 + xlen
+ if flag & FNAME:
+ # Read and discard a null-terminated string containing the filename
+ while True:
+ if not data[skip] or data[skip] == '\000':
+ break
+ skip += 1
+ skip += 1
+ if flag & FCOMMENT:
+ # Read and discard a null-terminated string containing a comment
+ while True:
+ if not data[skip] or data[skip] == '\000':
+ break
+ skip += 1
+ skip += 1
+ if flag & FHCRC:
+ skip += 2 # Read & discard the 16-bit header CRC
+
+ return data[skip:]
+
def error(self, error_msg):
"""Save an error response.
@@ -322,13 +418,16 @@
self.downloads = {}
self.cachedir = cachedir
- def download_get(self, path, func):
+ def download_get(self, path, func, decompress = False):
"""Create a new download from a site.
@type path: C{list} of C{string}
@param path: the server and path to download
@type func: C{method}
@param func: the method to call with the data when the download is complete
+ @type decompress: C{boolean}
+ @param decompress: whether to decompress the file while downloading it
+ (optional, defaults to not decompressing)
"""
@@ -337,7 +436,7 @@
self.downloads[path[0]] = CacheConnection(self, path[0])
filename = self.get_filename(path)
- if not self.downloads[path[0]].queue(CacheRequest(path, func, filename)):
+ if not self.downloads[path[0]].queue(CacheRequest(path, func, filename, decompress)):
func(path, (500, 'Internal Server Error',
{'Server': VERSION,
'Content-Type': 'text/html; charset=iso-8859-1'},
More information about the Debtorrent-commits
mailing list