[Pkg-bazaar-commits] ./bzr/unstable r791: - import effbot.org http client

Martin Pool mbp at sourcefrog.net
Fri Apr 10 08:21:03 UTC 2009


------------------------------------------------------------
revno: 791
committer: Martin Pool <mbp at sourcefrog.net>
timestamp: Mon 2005-06-27 11:39:26 +1000
message:
  - import effbot.org http client
added:
  effbot/
  effbot/__init__.py
  effbot/org/
  effbot/org/__init__.py
  effbot/org/gzip_consumer.py
  effbot/org/http_client.py
  effbot/org/http_manager.py
-------------- next part --------------
=== added directory 'effbot'
=== added file 'effbot/__init__.py'
--- a/effbot/__init__.py	1970-01-01 00:00:00 +0000
+++ b/effbot/__init__.py	2005-06-27 01:39:26 +0000
@@ -0,0 +1,1 @@
+# $Id: __init__.py 271 2004-10-09 10:50:59Z fredrik $

=== added directory 'effbot/org'
=== added file 'effbot/org/__init__.py'
--- a/effbot/org/__init__.py	1970-01-01 00:00:00 +0000
+++ b/effbot/org/__init__.py	2005-06-27 01:39:26 +0000
@@ -0,0 +1,1 @@
+# $Id: __init__.py 271 2004-10-09 10:50:59Z fredrik $

=== added file 'effbot/org/gzip_consumer.py'
--- a/effbot/org/gzip_consumer.py	1970-01-01 00:00:00 +0000
+++ b/effbot/org/gzip_consumer.py	2005-06-27 01:39:26 +0000
@@ -0,0 +1,60 @@
+# $Id: gzip_consumer.py 271 2004-10-09 10:50:59Z fredrik $
+# gzip consumer
+#
+# Copyright (c) 2001-2004 by Fredrik Lundh.  All rights reserved.
+#
+
+##
+# Consumer wrapper for GZIP streams.
+
+class GzipConsumer:
+
+    def __init__(self, consumer):
+        self.__consumer = consumer
+        self.__decoder = None
+        self.__data = ""
+
+    def __getattr__(self, key):
+        return getattr(self.__consumer, key)
+
+    def feed(self, data):
+        if self.__decoder is None:
+            # check if we have a full gzip header
+            data = self.__data + data
+            try:
+                i = 10
+                flag = ord(data[3])
+                if flag & 4: # extra
+                    x = ord(data[i]) + 256*ord(data[i+1])
+                    i = i + 2 + x
+                if flag & 8: # filename
+                    while ord(data[i]):
+                        i = i + 1
+                    i = i + 1
+                if flag & 16: # comment
+                    while ord(data[i]):
+                        i = i + 1
+                    i = i + 1
+                if flag & 2: # crc
+                    i = i + 2
+                if len(data) < i:
+                    raise IndexError("not enough data")
+                if data[:3] != "\x1f\x8b\x08":
+                    raise IOError("invalid gzip data")
+                data = data[i:]
+            except IndexError:
+                self.__data = data
+                return # need more data
+            import zlib
+            self.__data = ""
+            self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
+        data = self.__decoder.decompress(data)
+        if data:
+            self.__consumer.feed(data)
+
+    def close(self):
+        if self.__decoder:
+            data = self.__decoder.flush()
+            if data:
+                self.__consumer.feed(data)
+        self.__consumer.close()

=== added file 'effbot/org/http_client.py'
--- a/effbot/org/http_client.py	1970-01-01 00:00:00 +0000
+++ b/effbot/org/http_client.py	2005-06-27 01:39:26 +0000
@@ -0,0 +1,268 @@
+# $Id: http_client.py 271 2004-10-09 10:50:59Z fredrik $
+# a simple asynchronous http client (based on SimpleAsyncHTTP.py from
+# "Python Standard Library" by Fredrik Lundh, O'Reilly 2001)
+#
+# HTTP/1.1 and GZIP support added in January 2003 by Fredrik Lundh.
+#
+# changes:
+# 2004-08-26 fl   unified http callback
+# 2004-10-09 fl   factored out gzip_consumer support
+#
+# Copyright (c) 2001-2004 by Fredrik Lundh.  All rights reserved.
+#
+
+import asyncore
+import socket, string, time, sys
+import StringIO
+import mimetools, urlparse, urllib
+
+try:
+    from gzip_consumer import GzipConsumer
+except ImportError:
+    pass
+
+##
+# Close connection.   Request handlers can raise this exception to
+# indicate that the connection should be closed.
+
+class CloseConnection(Exception):
+    pass
+
+##
+# Redirect connection.  Request handlers can raise this exception to
+# indicate that the a new request should be issued.
+
+class Redirect(CloseConnection):
+    def __init__(self, location):
+        self.location = location
+
+##
+# Asynchronous HTTP/1.1 client.
+
+class async_http(asyncore.dispatcher_with_send):
+    # asynchronous http client
+
+    user_agent = "http_client.py 1.2 (http://effbot.org/zone)"
+    http_version = "1.1"
+
+    proxies = urllib.getproxies()
+
+    def __init__(self, uri, consumer, extra_headers=None):
+        asyncore.dispatcher_with_send.__init__(self)
+
+        # turn the uri into a valid request
+        scheme, host, path, params, query, fragment = urlparse.urlparse(uri)
+
+        # use origin host
+        self.host = host
+
+        # get proxy settings, if any
+        proxy = self.proxies.get(scheme)
+        if proxy:
+            scheme, host, x, x, x, x = urlparse.urlparse(proxy)
+
+        assert scheme == "http", "only supports HTTP requests (%s)" % scheme
+
+        if not path:
+            path = "/"
+        if params:
+            path = path + ";" + params
+        if query:
+            path = path + "?" + query
+        if proxy:
+            path = scheme + "://" + self.host + path
+
+        self.path = path
+
+        # get port number
+        try:
+            host, port = host.split(":", 1)
+            port = int(port)
+        except (TypeError, ValueError):
+            port = 80 # default port
+
+        self.consumer = consumer
+
+        self.status = None
+        self.header = None
+
+        self.bytes_in = 0
+        self.bytes_out = 0
+
+        self.content_type = None
+        self.content_length = None
+        self.content_encoding = None
+        self.transfer_encoding = None
+
+        self.data = ""
+
+        self.chunk_size = None
+
+        self.timestamp = time.time()
+
+        self.extra_headers = extra_headers
+
+        self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
+        try:
+            self.connect((host, port))
+        except socket.error:
+            self.consumer.http(0, self, sys.exc_info())
+
+    def handle_connect(self):
+        # connection succeeded
+
+        request = [
+            "GET %s HTTP/%s" % (self.path, self.http_version),
+            "Host: %s" % self.host,
+            ]
+
+        if GzipConsumer:
+            request.append("Accept-Encoding: gzip")
+
+        if self.extra_headers:
+            request.extend(self.extra_headers)
+
+        # make sure to include a user agent
+        for header in request:
+            if string.lower(header).startswith("user-agent:"):
+                break
+        else:
+            request.append("User-Agent: %s" % self.user_agent)
+
+        request = string.join(request, "\r\n") + "\r\n\r\n"
+
+        self.send(request)
+
+        self.bytes_out = self.bytes_out + len(request)
+
+    def handle_expt(self):
+        # connection failed (windows); notify consumer
+
+        if sys.platform == "win32":
+            self.close()
+            self.consumer.http(0, self)
+
+    def handle_read(self):
+        # handle incoming data
+
+        data = self.recv(2048)
+
+        self.data = self.data + data
+        self.bytes_in = self.bytes_in + len(data)
+
+        while self.data:
+
+            if not self.header:
+                # check if we've seen a full header
+
+                header = self.data.split("\r\n\r\n", 1)
+                if len(header) <= 1:
+                    return
+                header, self.data = header
+
+                # parse header
+                fp = StringIO.StringIO(header)
+                self.status = fp.readline().split(" ", 2)
+                self.header = mimetools.Message(fp)
+
+                # get http headers
+                self.content_type = self.header.get("content-type")
+                try:
+                    self.content_length = int(
+                        self.header.get("content-length")
+                        )
+                except (ValueError, TypeError):
+                    self.content_length = None
+                self.transfer_encoding = self.header.get("transfer-encoding")
+                self.content_encoding = self.header.get("content-encoding")
+
+                if self.content_encoding == "gzip":
+                    # FIXME: report error if GzipConsumer is not available
+                    self.consumer = GzipConsumer(self.consumer)
+
+                try:
+                    self.consumer.http(1, self)
+                except Redirect, v:
+                    # redirect
+                    if v.location:
+                        do_request(
+                            v.location, self.consumer, self.extra_headers
+                            )
+                    self.close()
+                    return
+                except CloseConnection:
+                    self.close()
+                    return
+
+            if self.transfer_encoding == "chunked" and self.chunk_size is None:
+
+                # strip off leading whitespace
+                if self.data.startswith("\r\n"):
+                    self.data = self.data[2:]
+
+                chunk_size = self.data.split("\r\n", 1)
+                if len(chunk_size) <= 1:
+                    return
+                chunk_size, self.data = chunk_size
+
+                try:
+                    self.chunk_size = int(chunk_size, 16)
+                    if self.chunk_size <= 0:
+                        raise ValueError
+                except ValueError:
+                    return self.handle_close()
+
+            if not self.data:
+                return
+
+            data = self.data
+            self.data = ""
+
+            chunk_size = self.chunk_size or len(data)
+
+            if chunk_size < len(data):
+                self.data = data[chunk_size:]
+                data = data[:chunk_size]
+                self.chunk_size = None
+            else:
+                self.chunk_size = chunk_size - len(data)
+                if self.chunk_size <= 0:
+                    self.chunk_size = None
+
+            if data:
+                self.consumer.feed(data)
+
+            if self.content_length:
+                self.content_length -= chunk_size
+                if self.content_length <= 0:
+                    return self.handle_close()
+
+    def handle_close(self):
+        self.consumer.close()
+        self.close()
+
+    def handle_error(self):
+        self.consumer.http(0, self, sys.exc_info())
+        self.close()
+
+def do_request(uri, consumer, extra_headers=None):
+
+    return async_http(uri, consumer, extra_headers)
+
+if __name__ == "__main__":
+    class dummy_consumer:
+        def feed(self, data):
+            # print "feed", repr(data)
+            print "feed", repr(data[:20]), repr(data[-20:]), len(data)
+        def close(self):
+            print "close"
+        def http(self, ok, connection, **args):
+            print ok, connection, args
+            print "status", connection.status
+            print "header", connection.header
+    try:
+        url = sys.argv[1]
+    except IndexError:
+        url = "http://www.cnn.com/"
+    do_request(url, dummy_consumer())
+    asyncore.loop()

=== added file 'effbot/org/http_manager.py'
--- a/effbot/org/http_manager.py	1970-01-01 00:00:00 +0000
+++ b/effbot/org/http_manager.py	2005-06-27 01:39:26 +0000
@@ -0,0 +1,67 @@
+# $Id: http_manager.py 270 2004-10-09 10:38:54Z fredrik $
+# effnews http
+#
+# manage a set of http clients
+#
+# Copyright (c) 2001-2004 by Fredrik Lundh.  All rights reserved.
+#
+
+import asyncore, time
+import http_client
+
+class http_manager:
+
+    max_connections = 8
+    max_size = 1000000
+    max_time = 60
+
+    def __init__(self):
+        self.queue = []
+
+    def request(self, uri, consumer, extra_headers=None):
+        self.queue.append((uri, consumer, extra_headers))
+
+    def priority_request(self, uri, consumer, extra_headers=None):
+        self.queue.insert(0, (uri, consumer, extra_headers))
+
+    def purge(self):
+        for channel in asyncore.socket_map.values():
+            channel.close()
+        del self.queue[:]
+
+    def prioritize(self, priority_uri):
+        i = 0
+        for uri, consumer, extra_headers in self.queue:
+            if uri == priority_uri:
+                del self.queue[i]
+                self.priority_request(uri, consumer, extra_headers)
+                return
+            i = i + 1
+
+    def poll(self, timeout=0.1):
+        # sanity checks
+        now = time.time()
+        for channel in asyncore.socket_map.values():
+            if channel.bytes_in > self.max_size:
+                channel.close() # too much data
+                try:
+                    channel.consumer.http(
+                        0, channel, ("HTTPManager", "too much data", None)
+                        )
+                except:
+                    pass
+            if channel.timestamp and now - channel.timestamp > self.max_time:
+                channel.close() # too slow
+                try:
+                    channel.consumer.http(
+                        0, channel, ("HTTPManager", "timeout", None)
+                        )
+                except:
+                    pass
+        # activate up to max_connections channels
+        while self.queue and len(asyncore.socket_map) < self.max_connections:
+            http_client.do_request(*self.queue.pop(0))
+        # keep the network running
+        asyncore.poll(timeout)
+        # return non-zero if we should keep on polling
+        return len(self.queue) or len(asyncore.socket_map)



More information about the Pkg-bazaar-commits mailing list