[Pkg-bazaar-commits] ./bzr/unstable r791: - import effbot.org http client
Martin Pool
mbp at sourcefrog.net
Fri Apr 10 08:21:03 UTC 2009
------------------------------------------------------------
revno: 791
committer: Martin Pool <mbp at sourcefrog.net>
timestamp: Mon 2005-06-27 11:39:26 +1000
message:
- import effbot.org http client
added:
effbot/
effbot/__init__.py
effbot/org/
effbot/org/__init__.py
effbot/org/gzip_consumer.py
effbot/org/http_client.py
effbot/org/http_manager.py
-------------- next part --------------
=== added directory 'effbot'
=== added file 'effbot/__init__.py'
--- a/effbot/__init__.py 1970-01-01 00:00:00 +0000
+++ b/effbot/__init__.py 2005-06-27 01:39:26 +0000
@@ -0,0 +1,1 @@
+# $Id: __init__.py 271 2004-10-09 10:50:59Z fredrik $
=== added directory 'effbot/org'
=== added file 'effbot/org/__init__.py'
--- a/effbot/org/__init__.py 1970-01-01 00:00:00 +0000
+++ b/effbot/org/__init__.py 2005-06-27 01:39:26 +0000
@@ -0,0 +1,1 @@
+# $Id: __init__.py 271 2004-10-09 10:50:59Z fredrik $
=== added file 'effbot/org/gzip_consumer.py'
--- a/effbot/org/gzip_consumer.py 1970-01-01 00:00:00 +0000
+++ b/effbot/org/gzip_consumer.py 2005-06-27 01:39:26 +0000
@@ -0,0 +1,60 @@
+# $Id: gzip_consumer.py 271 2004-10-09 10:50:59Z fredrik $
+# gzip consumer
+#
+# Copyright (c) 2001-2004 by Fredrik Lundh. All rights reserved.
+#
+
+##
+# Consumer wrapper for GZIP streams.
+
+class GzipConsumer:
+
+ def __init__(self, consumer):
+ self.__consumer = consumer
+ self.__decoder = None
+ self.__data = ""
+
+ def __getattr__(self, key):
+ return getattr(self.__consumer, key)
+
+ def feed(self, data):
+ if self.__decoder is None:
+ # check if we have a full gzip header
+ data = self.__data + data
+ try:
+ i = 10
+ flag = ord(data[3])
+ if flag & 4: # extra
+ x = ord(data[i]) + 256*ord(data[i+1])
+ i = i + 2 + x
+ if flag & 8: # filename
+ while ord(data[i]):
+ i = i + 1
+ i = i + 1
+ if flag & 16: # comment
+ while ord(data[i]):
+ i = i + 1
+ i = i + 1
+ if flag & 2: # crc
+ i = i + 2
+ if len(data) < i:
+ raise IndexError("not enough data")
+ if data[:3] != "\x1f\x8b\x08":
+ raise IOError("invalid gzip data")
+ data = data[i:]
+ except IndexError:
+ self.__data = data
+ return # need more data
+ import zlib
+ self.__data = ""
+ self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
+ data = self.__decoder.decompress(data)
+ if data:
+ self.__consumer.feed(data)
+
+ def close(self):
+ if self.__decoder:
+ data = self.__decoder.flush()
+ if data:
+ self.__consumer.feed(data)
+ self.__consumer.close()
=== added file 'effbot/org/http_client.py'
--- a/effbot/org/http_client.py 1970-01-01 00:00:00 +0000
+++ b/effbot/org/http_client.py 2005-06-27 01:39:26 +0000
@@ -0,0 +1,268 @@
+# $Id: http_client.py 271 2004-10-09 10:50:59Z fredrik $
+# a simple asynchronous http client (based on SimpleAsyncHTTP.py from
+# "Python Standard Library" by Fredrik Lundh, O'Reilly 2001)
+#
+# HTTP/1.1 and GZIP support added in January 2003 by Fredrik Lundh.
+#
+# changes:
+# 2004-08-26 fl unified http callback
+# 2004-10-09 fl factored out gzip_consumer support
+#
+# Copyright (c) 2001-2004 by Fredrik Lundh. All rights reserved.
+#
+
+import asyncore
+import socket, string, time, sys
+import StringIO
+import mimetools, urlparse, urllib
+
+try:
+ from gzip_consumer import GzipConsumer
+except ImportError:
+ pass
+
+##
+# Close connection. Request handlers can raise this exception to
+# indicate that the connection should be closed.
+
+class CloseConnection(Exception):
+ pass
+
+##
+# Redirect connection. Request handlers can raise this exception to
+# indicate that the a new request should be issued.
+
+class Redirect(CloseConnection):
+ def __init__(self, location):
+ self.location = location
+
+##
+# Asynchronous HTTP/1.1 client.
+
+class async_http(asyncore.dispatcher_with_send):
+ # asynchronous http client
+
+ user_agent = "http_client.py 1.2 (http://effbot.org/zone)"
+ http_version = "1.1"
+
+ proxies = urllib.getproxies()
+
+ def __init__(self, uri, consumer, extra_headers=None):
+ asyncore.dispatcher_with_send.__init__(self)
+
+ # turn the uri into a valid request
+ scheme, host, path, params, query, fragment = urlparse.urlparse(uri)
+
+ # use origin host
+ self.host = host
+
+ # get proxy settings, if any
+ proxy = self.proxies.get(scheme)
+ if proxy:
+ scheme, host, x, x, x, x = urlparse.urlparse(proxy)
+
+ assert scheme == "http", "only supports HTTP requests (%s)" % scheme
+
+ if not path:
+ path = "/"
+ if params:
+ path = path + ";" + params
+ if query:
+ path = path + "?" + query
+ if proxy:
+ path = scheme + "://" + self.host + path
+
+ self.path = path
+
+ # get port number
+ try:
+ host, port = host.split(":", 1)
+ port = int(port)
+ except (TypeError, ValueError):
+ port = 80 # default port
+
+ self.consumer = consumer
+
+ self.status = None
+ self.header = None
+
+ self.bytes_in = 0
+ self.bytes_out = 0
+
+ self.content_type = None
+ self.content_length = None
+ self.content_encoding = None
+ self.transfer_encoding = None
+
+ self.data = ""
+
+ self.chunk_size = None
+
+ self.timestamp = time.time()
+
+ self.extra_headers = extra_headers
+
+ self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
+ try:
+ self.connect((host, port))
+ except socket.error:
+ self.consumer.http(0, self, sys.exc_info())
+
+ def handle_connect(self):
+ # connection succeeded
+
+ request = [
+ "GET %s HTTP/%s" % (self.path, self.http_version),
+ "Host: %s" % self.host,
+ ]
+
+ if GzipConsumer:
+ request.append("Accept-Encoding: gzip")
+
+ if self.extra_headers:
+ request.extend(self.extra_headers)
+
+ # make sure to include a user agent
+ for header in request:
+ if string.lower(header).startswith("user-agent:"):
+ break
+ else:
+ request.append("User-Agent: %s" % self.user_agent)
+
+ request = string.join(request, "\r\n") + "\r\n\r\n"
+
+ self.send(request)
+
+ self.bytes_out = self.bytes_out + len(request)
+
+ def handle_expt(self):
+ # connection failed (windows); notify consumer
+
+ if sys.platform == "win32":
+ self.close()
+ self.consumer.http(0, self)
+
+ def handle_read(self):
+ # handle incoming data
+
+ data = self.recv(2048)
+
+ self.data = self.data + data
+ self.bytes_in = self.bytes_in + len(data)
+
+ while self.data:
+
+ if not self.header:
+ # check if we've seen a full header
+
+ header = self.data.split("\r\n\r\n", 1)
+ if len(header) <= 1:
+ return
+ header, self.data = header
+
+ # parse header
+ fp = StringIO.StringIO(header)
+ self.status = fp.readline().split(" ", 2)
+ self.header = mimetools.Message(fp)
+
+ # get http headers
+ self.content_type = self.header.get("content-type")
+ try:
+ self.content_length = int(
+ self.header.get("content-length")
+ )
+ except (ValueError, TypeError):
+ self.content_length = None
+ self.transfer_encoding = self.header.get("transfer-encoding")
+ self.content_encoding = self.header.get("content-encoding")
+
+ if self.content_encoding == "gzip":
+ # FIXME: report error if GzipConsumer is not available
+ self.consumer = GzipConsumer(self.consumer)
+
+ try:
+ self.consumer.http(1, self)
+ except Redirect, v:
+ # redirect
+ if v.location:
+ do_request(
+ v.location, self.consumer, self.extra_headers
+ )
+ self.close()
+ return
+ except CloseConnection:
+ self.close()
+ return
+
+ if self.transfer_encoding == "chunked" and self.chunk_size is None:
+
+ # strip off leading whitespace
+ if self.data.startswith("\r\n"):
+ self.data = self.data[2:]
+
+ chunk_size = self.data.split("\r\n", 1)
+ if len(chunk_size) <= 1:
+ return
+ chunk_size, self.data = chunk_size
+
+ try:
+ self.chunk_size = int(chunk_size, 16)
+ if self.chunk_size <= 0:
+ raise ValueError
+ except ValueError:
+ return self.handle_close()
+
+ if not self.data:
+ return
+
+ data = self.data
+ self.data = ""
+
+ chunk_size = self.chunk_size or len(data)
+
+ if chunk_size < len(data):
+ self.data = data[chunk_size:]
+ data = data[:chunk_size]
+ self.chunk_size = None
+ else:
+ self.chunk_size = chunk_size - len(data)
+ if self.chunk_size <= 0:
+ self.chunk_size = None
+
+ if data:
+ self.consumer.feed(data)
+
+ if self.content_length:
+ self.content_length -= chunk_size
+ if self.content_length <= 0:
+ return self.handle_close()
+
+ def handle_close(self):
+ self.consumer.close()
+ self.close()
+
+ def handle_error(self):
+ self.consumer.http(0, self, sys.exc_info())
+ self.close()
+
+def do_request(uri, consumer, extra_headers=None):
+
+ return async_http(uri, consumer, extra_headers)
+
+if __name__ == "__main__":
+ class dummy_consumer:
+ def feed(self, data):
+ # print "feed", repr(data)
+ print "feed", repr(data[:20]), repr(data[-20:]), len(data)
+ def close(self):
+ print "close"
+ def http(self, ok, connection, **args):
+ print ok, connection, args
+ print "status", connection.status
+ print "header", connection.header
+ try:
+ url = sys.argv[1]
+ except IndexError:
+ url = "http://www.cnn.com/"
+ do_request(url, dummy_consumer())
+ asyncore.loop()
=== added file 'effbot/org/http_manager.py'
--- a/effbot/org/http_manager.py 1970-01-01 00:00:00 +0000
+++ b/effbot/org/http_manager.py 2005-06-27 01:39:26 +0000
@@ -0,0 +1,67 @@
+# $Id: http_manager.py 270 2004-10-09 10:38:54Z fredrik $
+# effnews http
+#
+# manage a set of http clients
+#
+# Copyright (c) 2001-2004 by Fredrik Lundh. All rights reserved.
+#
+
+import asyncore, time
+import http_client
+
+class http_manager:
+
+ max_connections = 8
+ max_size = 1000000
+ max_time = 60
+
+ def __init__(self):
+ self.queue = []
+
+ def request(self, uri, consumer, extra_headers=None):
+ self.queue.append((uri, consumer, extra_headers))
+
+ def priority_request(self, uri, consumer, extra_headers=None):
+ self.queue.insert(0, (uri, consumer, extra_headers))
+
+ def purge(self):
+ for channel in asyncore.socket_map.values():
+ channel.close()
+ del self.queue[:]
+
+ def prioritize(self, priority_uri):
+ i = 0
+ for uri, consumer, extra_headers in self.queue:
+ if uri == priority_uri:
+ del self.queue[i]
+ self.priority_request(uri, consumer, extra_headers)
+ return
+ i = i + 1
+
+ def poll(self, timeout=0.1):
+ # sanity checks
+ now = time.time()
+ for channel in asyncore.socket_map.values():
+ if channel.bytes_in > self.max_size:
+ channel.close() # too much data
+ try:
+ channel.consumer.http(
+ 0, channel, ("HTTPManager", "too much data", None)
+ )
+ except:
+ pass
+ if channel.timestamp and now - channel.timestamp > self.max_time:
+ channel.close() # too slow
+ try:
+ channel.consumer.http(
+ 0, channel, ("HTTPManager", "timeout", None)
+ )
+ except:
+ pass
+ # activate up to max_connections channels
+ while self.queue and len(asyncore.socket_map) < self.max_connections:
+ http_client.do_request(*self.queue.pop(0))
+ # keep the network running
+ asyncore.poll(timeout)
+ # return non-zero if we should keep on polling
+ return len(self.queue) or len(asyncore.socket_map)
More information about the Pkg-bazaar-commits
mailing list