r113 - in /debtorrent/trunk/DebTorrent: BT1/AptListener.py HTTPCache.py
camrdale-guest at users.alioth.debian.org
camrdale-guest at users.alioth.debian.org
Sat Jun 16 06:06:55 UTC 2007
Author: camrdale-guest
Date: Sat Jun 16 06:06:55 2007
New Revision: 113
URL: http://svn.debian.org/wsvn/debtorrent/?sc=1&rev=113
Log:
Returns to APT more HTTP status codes and sets better headers.
Modified:
debtorrent/trunk/DebTorrent/BT1/AptListener.py
debtorrent/trunk/DebTorrent/HTTPCache.py
Modified: debtorrent/trunk/DebTorrent/BT1/AptListener.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/DebTorrent/BT1/AptListener.py?rev=113&op=diff
==============================================================================
--- debtorrent/trunk/DebTorrent/BT1/AptListener.py (original)
+++ debtorrent/trunk/DebTorrent/BT1/AptListener.py Sat Jun 16 06:06:55 2007
@@ -467,13 +467,15 @@
return (200, 'OK', {'Server': VERSION, 'Content-Type': 'text/html; charset=iso-8859-1'}, """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n<html><head><title>Meow</title>\n</head>\n<body style="color: rgb(255, 255, 255); background-color: rgb(0, 0, 0);">\n<div><big style="font-weight: bold;"><big><big><span style="font-family: arial,helvetica,sans-serif;">I IZ TAKIN BRAKE</span></big></big></big><br></div>\n<pre><b><tt> .-o=o-.<br> , /=o=o=o=\ .--.<br> _|\|=o=O=o=O=| \<br> __.' a`\=o=o=o=(`\ /<br> '. a 4/`|.-""'`\ \ ;'`) .---.<br> \ .' / .--' |_.' / .-._)<br> `) _.' / /`-.__.' /<br> `'-.____; /'-.___.-'<br> `\"""`</tt></b></pre>\n<div><big style="font-weight: bold;"><big><big><span style="font-family: arial,helvetica,sans-serif;">FRM GETIN UR PACKAGES</span></big></big></big><br></div>\n</body>\n</html>""")
- def get_cached(self, connection, path):
+ def get_cached(self, connection, path, headers):
"""Proxy the (possibly cached) download of a file from a mirror.
@type connection: L{DebTorrent.HTTPHandler.HTTPConnection}
@param connection: the conection the request came in on
@type path: C{list} of C{string}
@param path: the path of the file to download, starting with the mirror name
+ @type headers: C{dictionary}
+ @param headers: the headers from the request
@rtype: (C{int}, C{string}, C{dictionary}, C{string})
@return: the HTTP status code, status message, headers, and downloaded file
(or None if the file is being downloaded)
@@ -487,11 +489,11 @@
uptodate = False
# First check the cache for the file
- data = self.Cache.cache_get(path, uptodate)
+ r = self.Cache.cache_get(path, uptodate, headers.get('if-modified-since', ''))
# TODO: watch out for getting cached deb's that haven't yet downloaded (with saveas_style 2)
-
+
# If the cache doesn't have it
- if data is None:
+ if r[0] not in (200, 304):
# Get Debs from the debtorrent download, others are straight download
if path[-1][-4:] == '.deb':
return self.get_package(connection, path)
@@ -502,9 +504,15 @@
return None
if path[-1] in ('Packages', 'Packages.gz', 'Packages.bz2'):
- self.got_Packages(path, data)
-
- return (200, 'OK', {'Server': VERSION, 'Content-Type': 'text/plain'}, data)
+ # TODO: check if the torrent is already running before this
+ if r[0] == 304:
+ # Oops, we do need the cached file after all to start the torrent
+ r2 = self.Cache.cache_get(path)
+ self.got_Packages(path, r2[3])
+ else:
+ self.got_Packages(path, r[3])
+
+ return r
except IOError, e:
try:
@@ -514,13 +522,15 @@
msg = 'Unknown error occurred'
return (status, 'Not Found', {'Server': VERSION, 'Content-Type': 'text/plain', 'Pragma': 'no-cache'}, msg)
- def get_cached_callback(self, path, data):
+ def get_cached_callback(self, path, r):
"""Return the newly cached file to the waiting connection.
@type path: C{list} of C{string}
@param path: the path of the file to download, starting with the mirror name
@type data: C{string}
@param data: the downloaded newly cached file
+ @type r: (C{int}, C{string}, C{dictionary}, C{string})
+ @param r: the HTTP status code, status message, headers, and cached data
"""
@@ -533,18 +543,15 @@
return
# If it's a torrent file, start it
- if data is not None and path[-1] in ('Packages', 'Packages.gz', 'Packages.bz2'):
- self.got_Packages(path, data)
+ if r[0] == 200 and path[-1] in ('Packages', 'Packages.gz', 'Packages.bz2'):
+ self.got_Packages(path, r[3])
for connection in connections:
# Check to make sure the requester is still waiting
if connection.closed:
continue
- if data is None:
- connection.answer((404, 'Not Found', {'Server': VERSION, 'Content-Type': 'text/plain', 'Pragma': 'no-cache'}, alas))
- else:
- connection.answer((200, 'OK', {'Server': VERSION, 'Content-Type': 'text/plain'}, data))
+ connection.answer(r)
def get_package(self, connection, path):
"""Download a package file from a torrent.
@@ -672,6 +679,7 @@
infohash = sha(bencode(response['info'])).digest()
+ # TODO: cleanup, all these aren't needed
a = {}
a['path'] = '/'.join(path)
a['file'] = name
@@ -790,7 +798,7 @@
if 'Packages.diff' in path:
return (404, 'Not Found', {'Server': VERSION, 'Content-Type': 'text/plain', 'Pragma': 'no-cache'}, alas)
- return self.get_cached(connection, path)
+ return self.get_cached(connection, path, headers)
except ValueError, e:
return (400, 'Bad Request', {'Server': VERSION, 'Content-Type': 'text/plain'},
Modified: debtorrent/trunk/DebTorrent/HTTPCache.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/DebTorrent/HTTPCache.py?rev=113&op=diff
==============================================================================
--- debtorrent/trunk/DebTorrent/HTTPCache.py (original)
+++ debtorrent/trunk/DebTorrent/HTTPCache.py Sat Jun 16 06:06:55 2007
@@ -7,16 +7,18 @@
@type VERSION: C{string}
@var VERSION: the UserAgent identifier sent to all sites
+ at type alas: C{string}
+ at var alas: the message to send when the data is not found
"""
-from httplib import HTTPConnection, BadStatusLine
+from httplib import HTTPConnection
from threading import Thread
from traceback import print_exc
from DebTorrent.__init__ import product_name,version_short
from os.path import join, split, getmtime, getsize, exists
from os import utime, makedirs, listdir
-from time import strptime, gmtime
+from time import strftime, strptime, gmtime
from calendar import timegm
try:
True
@@ -26,7 +28,9 @@
DEBUG = True
+time_format = '%a, %d %b %Y %H:%M:%S %Z'
VERSION = product_name+'/'+version_short
+alas = 'your file may exist elsewhere in the universe\nbut alas, not here\n'
class CacheRequest:
"""Download a file needed for the HTTP download cache.
@@ -44,23 +48,16 @@
@type connection: C{HTTPConnection}
@ivar connection: the connection to the HTTP server
@type headers: C{dictionary}
- @ivar headres: the HTTP headers to send in the request
- @type error: C{string}
- @ivar error: the error received from the server
- @type errorcount: C{int}
- @ivar errorcount: the number of download errors that have occurred since
- the last successful download from the site
+ @ivar headres: the HTTP headers to send in the request, and the headers
+ returned by the response
@type active: C{boolean}
@ivar active: whether there is a download underway
- @type cancelled: C{boolean}
- @ivar cancelled: whether the download has been cancelled
@type received_data: C{string}
- @ivar received_data: the data returned from the most recent request
+ @ivar received_data: the data returned from the server
@type connection_status: C{int}
- @ivar connection_status: the status code returned by the server for the
- most recent request
- @type last_modified: C{string}
- @ivar last_modified: the Last-Modified HTTP header from the request
+ @ivar connection_status: the status code returned by the server
+ @type connection_response: C{string}
+ @ivar connection_status: the status message returned by the server
"""
@@ -89,10 +86,7 @@
return
self.headers = {'User-Agent': VERSION}
- self.error = None
- self.errorcount = 0
self.active = False
- self.cancelled = False
if DEBUG:
print 'CacheRequest: downloading ', self.url
rq = Thread(target = self._request)
@@ -106,72 +100,39 @@
import encodings.punycode
import encodings.idna
- self.error = None
- self.received_data = None
try:
if DEBUG:
print 'CacheRequest: sending request'
print 'GET', self.url, self.headers
self.connection.request('GET',self.url, None, self.headers)
- # Check for closed persistent connection due to server timeout
- try:
- r = self.connection.getresponse()
- except BadStatusLine:
- # Reopen the connection to get a new socket
- self.connection.close()
- self.connection.connect()
- self.connection.request('GET',self.url, None, self.headers)
- r = self.connection.getresponse()
+ r = self.connection.getresponse()
if DEBUG:
print 'CacheRequest: got response'
print r.status, r.reason, r.getheaders()
self.connection_status = r.status
- self.last_modified = r.getheader('last-modified')
+ self.connection_response = r.reason
+ self.headers = dict(r.getheaders())
self.received_data = r.read()
except Exception, e:
if DEBUG:
print 'error accessing http server: '+str(e)
print_exc()
- self.error = 'error accessing http server: '+str(e)
- try:
- self.connection.close()
- except:
- pass
- try:
- self.connection = HTTPConnection(self.server)
- except:
- self.connection = None # will cause an exception and retry next cycle
+ self.connection_status = 500
+ self.connection_response = 'Internal Server Error'
+ self.headers = {}
+ self.received_data = 'error accessing http server: '+str(e)
self.handler.rawserver.add_task(self.request_finished)
def request_finished(self):
"""Process the completed request."""
+ self.connection.close()
self.active = False
- if self.error is not None:
- self.errorcount += 1
- if self.received_data:
- self.errorcount = 0
- if not self._got_data():
- self.received_data = None
self.handler.download_complete(self, self.path, self.func,
- self.received_data, self.last_modified)
-
- def _got_data(self):
- """Process the returned data from the request.
-
- @rtype: C{boolean}
- @return: whether the data was good
-
- """
-
- if self.connection_status not in [200, 206]:
- self.errorcount += 1
- return False
- if self.cancelled:
- return False
- return True
-
+ (self.connection_status, self.connection_response,
+ self.headers, self.received_data))
+
class HTTPCache:
"""Manage an HTTP download cache.
@@ -208,12 +169,12 @@
print 'Starting a HttpCache downloader for:', 'http://'+'/'.join(path)
self.downloads.append(CacheRequest(self, path, func))
- def download_complete(self, d, path, func, data, last_modified):
+ def download_complete(self, d, path, func, r):
"""Remove a completed download from the list and process the data.
Once a download has been completed, remove the downloader from the
list and save the downloaded file in the file system. Then return the
- data to the callback function.
+ data to the callback function.
@type d: L{CacheRequest}
@param d: the cache request that is completed
@@ -221,10 +182,8 @@
@param path: the server and path that was downloaded
@type func: C{method}
@param func: the method to call with the data
- @type data: C{string}
- @param data: the downloaded data
- @type last_modified: C{string}
- @param last_modified: the Last-Modified HTTP header from the request
+ @type r: (C{int}, C{string}, C{dictionary}, C{string})
+ @param r: the HTTP status code, status message, headers, and downloaded data
"""
@@ -232,11 +191,8 @@
print 'HttpCache download completed for:', 'http://'+'/'.join(path)
self.downloads.remove(d)
- if data is not None:
- # Build the file name from the path list
- file = path[0]
- for i in path[1:]:
- file = join(file, i)
+ if r[0] in (200, 206):
+ file = self.get_filename(path)
# Create the directory for the new file
new_dir = split(file)[0]
@@ -245,42 +201,54 @@
# Write the new file
f = open(file, 'wb')
- f.write(data)
+ f.write(r[3])
f.close()
# Set the modified time (on error use current time which should work)
try:
- mtime = timegm(strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z'))
+ mtime = timegm(strptime(r[2]['last-modified'], '%a, %d %b %Y %H:%M:%S %Z'))
times = (mtime, mtime)
utime(file, times)
except:
pass
+ # Use the headers we want
+ mtime_string = strftime(time_format, gmtime(getmtime(file)))
+ headers = {'Server': VERSION, 'last-modified': mtime_string}
+ for k, v in r[2].items():
+ if k in ('last-modified', 'content-type'):
+ headers[k] = v
+
# Call the callback function
- func(path, data)
-
- def cache_get(self, path, uptodate = True):
+ func(path, (r[0], r[1], headers, r[3]))
+
+ def cache_get(self, path, uptodate = False, if_modified_time = ''):
"""Get the file from the cache.
+ Will respond with the following HTTP status codes:
+ - 200: the file was found in the cache and is up to date
+ - 304: the file is up to date, but is not needed
+ - 404: the file was not found in the cache
+ - 405: the file was found, but is stale, and needs to be refreshed
+
@type path: C{list} of C{string}
@param path: the server and path to download
@type uptodate: C{boolean}
- @param uptodate: whether to check the age of the file to see if it
- is still current (optional, defaults to True)
- @rtype: C{string}
- @return: the cached data, or None if the cached data was not found
- or is stale
-
- """
-
- # Build the file name
- file = path[0]
- for i in path[1:]:
- file = join(file, i)
-
- # Return None if the file isn't in the cache
+ @param uptodate: whether to check the age of the file on the server to
+ see if the cached one is still current (optional, defaults to False)
+ @type if_modified_time: C{string}
+ @param if_modified_time: the if-modified-since header from the request
+ (optional, defaults to not checking the if-modified-time)
+ @rtype: (C{int}, C{string}, C{dictionary}, C{string})
+ @return: the HTTP status code, status message, headers, and package data
+
+ """
+
+ file = self.get_filename(path)
+
+ # Check if the file isn't in the cache
if not exists(file):
- return None
+ return (404, 'Not Found', {'Server': VERSION, 'Content-Type': 'text/plain', 'Pragma': 'no-cache'}, alas)
if uptodate:
# Get the last modified time from the server
@@ -288,15 +256,79 @@
connection.request('HEAD', '/' + '/'.join(path[1:]), None, {'User-Agent': VERSION})
r = connection.getresponse()
last_modified = r.getheader('last-modified')
-
- # Check the server's time against the cached copy
- file_mtime = getmtime(file)
- server_mtime = timegm(strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z'))
- if server_mtime - file_mtime > 65:
- return None
-
+ connection.close()
+
+ # Check if the cached data is stale
+ if self.check_mtime(last_modified, file = file) > 0:
+ return (405, 'Method Not Allowed', {'Server': VERSION, 'Content-Type': 'text/plain', 'Pragma': 'no-cache'}, alas)
+
+ # Check if the request needs the data
+ if if_modified_time and self.check_mtime(if_modified_time, file = file) >= 0:
+ return (304, 'Not Modified', {'Server': VERSION, 'Pragma': 'no-cache'}, '')
+
# Read in the file and return the data
f = open(file, 'rb')
data = f.read()
f.close()
- return data
+ mtime_string = strftime(time_format, gmtime(getmtime(file)))
+
+ return (200, 'OK', {'Server': VERSION, 'Content-Type': 'text/plain', 'Last-Modified': mtime_string}, data)
+
+ def get_filename(self, path):
+ """Get the file name used for this path in the cache.
+
+ @type path: C{list} of C{string}
+ @param path: the server and path to download
+ @rtype: C{string}
+ @return: the file name
+
+ """
+
+ if not path:
+ return None
+
+ # Build the file name
+ file = path[0]
+ for i in path[1:]:
+ file = join(file, i)
+
+ return file
+
+ def check_mtime(self, http_mtime_string, path = [], file = '', server_mtime_string = ''):
+ """Check the modified time of a file in the cache against a server header string.
+
+ @type http_mtime_string: C{string}
+ @param http_mtime_string: the modified time from an HTTP header
+ @type path: C{list} of C{string}
+ @param path: the server and path to download
+ (optional, but one of file/path/server_mtime must be specified)
+ @type file: C{string}
+ @param file: the file name in the cache
+ (optional, but one of file/path/server_mtime must be specified)
+ @type server_mtime_string: C{string}
+ @param server_mtime_string: the last-modified time from the server's copy
+ (optional, but one of file/path/server_mtime must be specified)
+ @rtype: C{int}
+ @return: the number of seconds the header's mtime is ahead of the
+ file's mtime (or None if the file isn't in the cache)
+
+ """
+
+ assert path or file or server_mtime_string
+
+ if path:
+ file = self.get_filename(path)
+
+ if file:
+ # Return None if the file isn't in the cache
+ if not exists(file):
+ return None
+
+ # Check the server's time against the cached copy
+ server_mtime = getmtime(file)
+ else:
+ server_mtime = timegm(strptime(server_mtime_string, time_format))
+
+ http_mtime = timegm(strptime(http_mtime_string, time_format))
+
+ return http_mtime - server_mtime
More information about the Debtorrent-commits
mailing list