r113 - in /debtorrent/trunk/DebTorrent: BT1/AptListener.py HTTPCache.py

camrdale-guest at users.alioth.debian.org camrdale-guest at users.alioth.debian.org
Sat Jun 16 06:06:55 UTC 2007


Author: camrdale-guest
Date: Sat Jun 16 06:06:55 2007
New Revision: 113

URL: http://svn.debian.org/wsvn/debtorrent/?sc=1&rev=113
Log:
Returns to APT more HTTP status codes and sets better headers.

Modified:
    debtorrent/trunk/DebTorrent/BT1/AptListener.py
    debtorrent/trunk/DebTorrent/HTTPCache.py

Modified: debtorrent/trunk/DebTorrent/BT1/AptListener.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/DebTorrent/BT1/AptListener.py?rev=113&op=diff
==============================================================================
--- debtorrent/trunk/DebTorrent/BT1/AptListener.py (original)
+++ debtorrent/trunk/DebTorrent/BT1/AptListener.py Sat Jun 16 06:06:55 2007
@@ -467,13 +467,15 @@
         return (200, 'OK', {'Server': VERSION, 'Content-Type': 'text/html; charset=iso-8859-1'}, """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n<html><head><title>Meow</title>\n</head>\n<body style="color: rgb(255, 255, 255); background-color: rgb(0, 0, 0);">\n<div><big style="font-weight: bold;"><big><big><span style="font-family: arial,helvetica,sans-serif;">I&nbsp;IZ&nbsp;TAKIN&nbsp;BRAKE</span></big></big></big><br></div>\n<pre><b><tt>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; .-o=o-.<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; ,&nbsp; /=o=o=o=\ .--.<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; _|\|=o=O=o=O=|&nbsp;&nbsp;&nbsp; \<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; __.'&nbsp; a`\=o=o=o=(`\&nbsp;&nbsp; /<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; '.&nbsp;&nbsp; a 4/`|.-""'`\ \ ;'`)&nbsp;&nbsp; .---.<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \&nbsp;&nbsp; .'&nbsp; /&nbsp;&nbsp; .--'&nbsp; |_.'&nbsp;&nbsp; / .-._)<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; `)&nbsp; _.'&nbsp;&nbsp; /&nbsp;&nbsp;&nbsp;&nbsp; /`-.__.' /<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; `'-.____;&nbsp;&nbsp;&nbsp;&nbsp; /'-.___.-'<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; `\"""`</tt></b></pre>\n<div><big style="font-weight: bold;"><big><big><span style="font-family: arial,helvetica,sans-serif;">FRM&nbsp;GETIN&nbsp;UR&nbsp;PACKAGES</span></big></big></big><br></div>\n</body>\n</html>""")
 
 
-    def get_cached(self, connection, path):
+    def get_cached(self, connection, path, headers):
         """Proxy the (possibly cached) download of a file from a mirror.
         
         @type connection: L{DebTorrent.HTTPHandler.HTTPConnection}
         @param connection: the conection the request came in on
         @type path: C{list} of C{string}
         @param path: the path of the file to download, starting with the mirror name
+        @type headers: C{dictionary}
+        @param headers: the headers from the request
         @rtype: (C{int}, C{string}, C{dictionary}, C{string})
         @return: the HTTP status code, status message, headers, and downloaded file
             (or None if the file is being downloaded)
@@ -487,11 +489,11 @@
                 uptodate = False
 
             # First check the cache for the file
-            data = self.Cache.cache_get(path, uptodate)
+            r = self.Cache.cache_get(path, uptodate, headers.get('if-modified-since', ''))
             # TODO: watch out for getting cached deb's that haven't yet downloaded (with saveas_style 2)
-
+            
             # If the cache doesn't have it
-            if data is None:
+            if r[0] not in (200, 304):
                 # Get Debs from the debtorrent download, others are straight download
                 if path[-1][-4:] == '.deb':
                     return self.get_package(connection, path)
@@ -502,9 +504,15 @@
                     return None
             
             if path[-1] in ('Packages', 'Packages.gz', 'Packages.bz2'):
-                self.got_Packages(path, data)
-
-            return (200, 'OK', {'Server': VERSION, 'Content-Type': 'text/plain'}, data)
+                # TODO: check if the torrent is already running before this
+                if r[0] == 304:
+                    # Oops, we do need the cached file after all to start the torrent
+                    r2 = self.Cache.cache_get(path)
+                    self.got_Packages(path, r2[3])
+                else:
+                    self.got_Packages(path, r[3])                    
+
+            return r
         
         except IOError, e:
             try:
@@ -514,13 +522,15 @@
                 msg = 'Unknown error occurred'
             return (status, 'Not Found', {'Server': VERSION, 'Content-Type': 'text/plain', 'Pragma': 'no-cache'}, msg)
             
-    def get_cached_callback(self, path, data):
+    def get_cached_callback(self, path, r):
         """Return the newly cached file to the waiting connection.
         
         @type path: C{list} of C{string}
         @param path: the path of the file to download, starting with the mirror name
         @type data: C{string}
         @param data: the downloaded newly cached file
+        @type r: (C{int}, C{string}, C{dictionary}, C{string})
+        @param r: the HTTP status code, status message, headers, and cached data
         
         """
 
@@ -533,18 +543,15 @@
             return
 
         # If it's a torrent file, start it
-        if data is not None and path[-1] in ('Packages', 'Packages.gz', 'Packages.bz2'):
-            self.got_Packages(path, data)
+        if r[0] == 200 and path[-1] in ('Packages', 'Packages.gz', 'Packages.bz2'):
+            self.got_Packages(path, r[3])
 
         for connection in connections:
             # Check to make sure the requester is still waiting
             if connection.closed:
                 continue
             
-            if data is None:
-                connection.answer((404, 'Not Found', {'Server': VERSION, 'Content-Type': 'text/plain', 'Pragma': 'no-cache'}, alas))
-            else:
-                connection.answer((200, 'OK', {'Server': VERSION, 'Content-Type': 'text/plain'}, data))
+            connection.answer(r)
             
     def get_package(self, connection, path):
         """Download a package file from a torrent.
@@ -672,6 +679,7 @@
         
         infohash = sha(bencode(response['info'])).digest()
         
+        # TODO: cleanup, all these aren't needed
         a = {}
         a['path'] = '/'.join(path)
         a['file'] = name
@@ -790,7 +798,7 @@
             if 'Packages.diff' in path:
                 return (404, 'Not Found', {'Server': VERSION, 'Content-Type': 'text/plain', 'Pragma': 'no-cache'}, alas)
             
-            return self.get_cached(connection, path)
+            return self.get_cached(connection, path, headers)
             
         except ValueError, e:
             return (400, 'Bad Request', {'Server': VERSION, 'Content-Type': 'text/plain'}, 

Modified: debtorrent/trunk/DebTorrent/HTTPCache.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/trunk/DebTorrent/HTTPCache.py?rev=113&op=diff
==============================================================================
--- debtorrent/trunk/DebTorrent/HTTPCache.py (original)
+++ debtorrent/trunk/DebTorrent/HTTPCache.py Sat Jun 16 06:06:55 2007
@@ -7,16 +7,18 @@
 
 @type VERSION: C{string}
 @var VERSION: the UserAgent identifier sent to all sites
+ at type alas: C{string}
+ at var alas: the message to send when the data is not found
 
 """
 
-from httplib import HTTPConnection, BadStatusLine
+from httplib import HTTPConnection
 from threading import Thread
 from traceback import print_exc
 from DebTorrent.__init__ import product_name,version_short
 from os.path import join, split, getmtime, getsize, exists
 from os import utime, makedirs, listdir
-from time import strptime, gmtime
+from time import strftime, strptime, gmtime
 from calendar import timegm
 try:
     True
@@ -26,7 +28,9 @@
 
 DEBUG = True
 
+time_format = '%a, %d %b %Y %H:%M:%S %Z'
 VERSION = product_name+'/'+version_short
+alas = 'your file may exist elsewhere in the universe\nbut alas, not here\n'
 
 class CacheRequest:
     """Download a file needed for the HTTP download cache.
@@ -44,23 +48,16 @@
     @type connection: C{HTTPConnection}
     @ivar connection: the connection to the HTTP server
     @type headers: C{dictionary}
-    @ivar headres: the HTTP headers to send in the request
-    @type error: C{string}
-    @ivar error: the error received from the server
-    @type errorcount: C{int}
-    @ivar errorcount: the number of download errors that have occurred since
-        the last successful download from the site
+    @ivar headres: the HTTP headers to send in the request, and the headers
+        returned by the response
     @type active: C{boolean}
     @ivar active: whether there is a download underway
-    @type cancelled: C{boolean}
-    @ivar cancelled: whether the download has been cancelled
     @type received_data: C{string}
-    @ivar received_data: the data returned from the most recent request
+    @ivar received_data: the data returned from the server
     @type connection_status: C{int}
-    @ivar connection_status: the status code returned by the server for the 
-        most recent request
-    @type last_modified: C{string}
-    @ivar last_modified: the Last-Modified HTTP header from the request
+    @ivar connection_status: the status code returned by the server
+    @type connection_response: C{string}
+    @ivar connection_status: the status message returned by the server
     
     """
     
@@ -89,10 +86,7 @@
             return
         
         self.headers = {'User-Agent': VERSION}
-        self.error = None
-        self.errorcount = 0
         self.active = False
-        self.cancelled = False
         if DEBUG:
             print 'CacheRequest: downloading ', self.url
         rq = Thread(target = self._request)
@@ -106,72 +100,39 @@
         import encodings.punycode
         import encodings.idna
         
-        self.error = None
-        self.received_data = None
         try:
             if DEBUG:
                 print 'CacheRequest: sending request'
                 print 'GET', self.url, self.headers
             self.connection.request('GET',self.url, None, self.headers)
             
-            # Check for closed persistent connection due to server timeout
-            try:
-                r = self.connection.getresponse()
-            except BadStatusLine:
-                # Reopen the connection to get a new socket
-                self.connection.close()
-                self.connection.connect()
-                self.connection.request('GET',self.url, None, self.headers)
-                r = self.connection.getresponse()
+            r = self.connection.getresponse()
                 
             if DEBUG:
                 print 'CacheRequest: got response'
                 print r.status, r.reason, r.getheaders()
             self.connection_status = r.status
-            self.last_modified = r.getheader('last-modified')
+            self.connection_response = r.reason
+            self.headers = dict(r.getheaders())
             self.received_data = r.read()
         except Exception, e:
             if DEBUG:
                 print 'error accessing http server: '+str(e)
                 print_exc()
-            self.error = 'error accessing http server: '+str(e)
-            try:
-                self.connection.close()
-            except:
-                pass
-            try:
-                self.connection = HTTPConnection(self.server)
-            except:
-                self.connection = None  # will cause an exception and retry next cycle
+            self.connection_status = 500
+            self.connection_response = 'Internal Server Error'
+            self.headers = {}
+            self.received_data = 'error accessing http server: '+str(e)
         self.handler.rawserver.add_task(self.request_finished)
 
     def request_finished(self):
         """Process the completed request."""
+        self.connection.close()
         self.active = False
-        if self.error is not None:
-            self.errorcount += 1
-        if self.received_data:
-            self.errorcount = 0
-            if not self._got_data():
-                self.received_data = None
         self.handler.download_complete(self, self.path, self.func, 
-                                       self.received_data, self.last_modified)
-
-    def _got_data(self):
-        """Process the returned data from the request.
-        
-        @rtype: C{boolean}
-        @return: whether the data was good
-        
-        """
-        
-        if self.connection_status not in [200, 206]:
-            self.errorcount += 1
-            return False
-        if self.cancelled:
-            return False
-        return True
-    
+                   (self.connection_status, self.connection_response, 
+                    self.headers, self.received_data))
+
 
 class HTTPCache:
     """Manage an HTTP download cache.
@@ -208,12 +169,12 @@
             print 'Starting a HttpCache downloader for:', 'http://'+'/'.join(path)
         self.downloads.append(CacheRequest(self, path, func))
 
-    def download_complete(self, d, path, func, data, last_modified):
+    def download_complete(self, d, path, func, r):
         """Remove a completed download from the list and process the data.
         
         Once a download has been completed, remove the downloader from the 
         list and save the downloaded file in the file system. Then return the
-        data to the callback function.
+        data to the callback function. 
         
         @type d: L{CacheRequest}
         @param d: the cache request that is completed
@@ -221,10 +182,8 @@
         @param path: the server and path that was downloaded
         @type func: C{method}
         @param func: the method to call with the data
-        @type data: C{string}
-        @param data: the downloaded data
-        @type last_modified: C{string}
-        @param last_modified: the Last-Modified HTTP header from the request
+        @type r: (C{int}, C{string}, C{dictionary}, C{string})
+        @param r: the HTTP status code, status message, headers, and downloaded data
         
         """
         
@@ -232,11 +191,8 @@
             print 'HttpCache download completed for:', 'http://'+'/'.join(path)
         self.downloads.remove(d)
 
-        if data is not None:
-            # Build the file name from the path list
-            file = path[0]
-            for i in path[1:]:
-                file = join(file, i)
+        if r[0] in (200, 206):
+            file = self.get_filename(path)
             
             # Create the directory for the new file
             new_dir = split(file)[0]
@@ -245,42 +201,54 @@
             
             # Write the new file
             f = open(file, 'wb')
-            f.write(data)
+            f.write(r[3])
             f.close()
             
             # Set the modified time (on error use current time which should work)
             try:
-                mtime = timegm(strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z'))
+                mtime = timegm(strptime(r[2]['last-modified'], '%a, %d %b %Y %H:%M:%S %Z'))
                 times = (mtime, mtime)
                 utime(file, times)
             except:
                 pass
 
+        # Use the headers we want
+        mtime_string = strftime(time_format, gmtime(getmtime(file)))
+        headers = {'Server': VERSION, 'last-modified': mtime_string}
+        for k, v in r[2].items():
+            if k in ('last-modified', 'content-type'):
+                headers[k] = v
+        
         # Call the callback function
-        func(path, data)
-
-    def cache_get(self, path, uptodate = True):
+        func(path, (r[0], r[1], headers, r[3]))
+
+    def cache_get(self, path, uptodate = False, if_modified_time = ''):
         """Get the file from the cache.
         
+        Will respond with the following HTTP status codes:
+            - 200: the file was found in the cache and is up to date
+            - 304: the file is up to date, but is not needed
+            - 404: the file was not found in the cache
+            - 405: the file was found, but is stale, and needs to be refreshed
+        
         @type path: C{list} of C{string}
         @param path: the server and path to download
         @type uptodate: C{boolean}
-        @param uptodate: whether to check the age of the file to see if it 
-            is still current (optional, defaults to True)
-        @rtype: C{string}
-        @return: the cached data, or None if the cached data was not found 
-            or is stale
-        
-        """
-        
-        # Build the file name
-        file = path[0]
-        for i in path[1:]:
-            file = join(file, i)
-            
-        # Return None if the file isn't in the cache
+        @param uptodate: whether to check the age of the file on the server to 
+            see if the cached one is still current (optional, defaults to False)
+        @type if_modified_time: C{string}
+        @param if_modified_time: the if-modified-since header from the request
+            (optional, defaults to not checking the if-modified-time)
+        @rtype: (C{int}, C{string}, C{dictionary}, C{string})
+        @return: the HTTP status code, status message, headers, and package data
+        
+        """
+        
+        file = self.get_filename(path)
+            
+        # Check if the file isn't in the cache
         if not exists(file):
-            return None
+            return (404, 'Not Found', {'Server': VERSION, 'Content-Type': 'text/plain', 'Pragma': 'no-cache'}, alas)
         
         if uptodate:
             # Get the last modified time from the server
@@ -288,15 +256,79 @@
             connection.request('HEAD', '/' + '/'.join(path[1:]), None, {'User-Agent': VERSION})
             r = connection.getresponse()
             last_modified = r.getheader('last-modified')
-            
-            # Check the server's time against the cached copy
-            file_mtime = getmtime(file)
-            server_mtime = timegm(strptime(last_modified, '%a, %d %b %Y %H:%M:%S %Z'))
-            if server_mtime - file_mtime > 65:
-                return None
-
+            connection.close()
+
+            # Check if the cached data is stale
+            if self.check_mtime(last_modified, file = file) > 0:
+                return (405, 'Method Not Allowed', {'Server': VERSION, 'Content-Type': 'text/plain', 'Pragma': 'no-cache'}, alas)
+
+        # Check if the request needs the data
+        if if_modified_time and self.check_mtime(if_modified_time, file = file) >= 0:
+            return (304, 'Not Modified', {'Server': VERSION, 'Pragma': 'no-cache'}, '')
+            
         # Read in the file and return the data
         f = open(file, 'rb')
         data = f.read()
         f.close()
-        return data
+        mtime_string = strftime(time_format, gmtime(getmtime(file)))
+        
+        return (200, 'OK', {'Server': VERSION, 'Content-Type': 'text/plain', 'Last-Modified': mtime_string}, data)
+
+    def get_filename(self, path):
+        """Get the file name used for this path in the cache.
+        
+        @type path: C{list} of C{string}
+        @param path: the server and path to download
+        @rtype: C{string}
+        @return: the file name
+        
+        """
+        
+        if not path:
+            return None
+        
+        # Build the file name
+        file = path[0]
+        for i in path[1:]:
+            file = join(file, i)
+
+        return file
+
+    def check_mtime(self, http_mtime_string, path = [], file = '', server_mtime_string = ''):
+        """Check the modified time of a file in the cache against a server header string.
+        
+        @type http_mtime_string: C{string}
+        @param http_mtime_string: the modified time from an HTTP header
+        @type path: C{list} of C{string}
+        @param path: the server and path to download
+            (optional, but one of file/path/server_mtime must be specified)
+        @type file: C{string}
+        @param file: the file name in the cache 
+            (optional, but one of file/path/server_mtime must be specified)
+        @type server_mtime_string: C{string}
+        @param server_mtime_string: the last-modified time from the server's copy
+            (optional, but one of file/path/server_mtime must be specified)
+        @rtype: C{int}
+        @return: the number of seconds the header's mtime is ahead of the 
+            file's mtime (or None if the file isn't in the cache)
+        
+        """
+        
+        assert path or file or server_mtime_string
+
+        if path:
+            file = self.get_filename(path)
+            
+        if file:
+            # Return None if the file isn't in the cache
+            if not exists(file):
+                return None
+            
+            # Check the server's time against the cached copy
+            server_mtime = getmtime(file)
+        else:
+            server_mtime = timegm(strptime(server_mtime_string, time_format))
+
+        http_mtime = timegm(strptime(http_mtime_string, time_format))
+
+        return http_mtime - server_mtime




More information about the Debtorrent-commits mailing list