r225 - branches/rewrite/src

Tue, 21 Sep 2004 10:44:36 -0600

Author: otavio
Date: Tue Sep 21 10:44:35 2004
New Revision: 225

Added:
   branches/rewrite/src/Download.py   (contents, props changed)
Log:
Add my initial draft for Download manager class.

Added: branches/rewrite/src/Download.py
==============================================================================

--- (empty file)
+++ branches/rewrite/src/Download.py	Tue Sep 21 10:44:35 2004
@@ -0,0 +1,123 @@
+# debpartial-mirror - partial debian mirror package tool
+# (c) 2004 Otavio Salvador <otavio@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+# $Id$
+
+import pycurl, sys, string
+
+import signal
+from signal import SIGPIPE, SIG_IGN
+
+def progress(download_t, download_d, upload_t, upload_d):
+    print "Total to download", download_t
+    print "Total downloaded", download_d
+    print "Total to upload", upload_t
+    print "Total uploaded", upload_d
+
+class Download:
+    """ Download queue """
+    queue = []
+    """ Fetcher to use """
+    fetcher = None
+
+    def __init__(self, uri, destine):
+        if (uri, destine) not in self.queue:
+            self.queue.append((uri, destine))
+
+        print len(self.queue)
+
+        if not self.fetcher:
+            self.fetcher = DownloadFetcher(2)
+        else:
+            print "Usando fetcher anterior..."
+
+class DownloadFetcher:
+    __objs = None
+    
+    def __init__(self, max_connections = 2):
+        """ We should ignore SIGPIPE when using pycurl.NOSIGNAL - see
+        the libcurl documentation `libcurl-the-guide' for more info."""
+        signal.signal(signal.SIGPIPE, signal.SIG_IGN)
+
+        """ Make the needed objects to handle the connections."""
+        self.__objs = pycurl.CurlMulti()
+        self.__objs.handles = []
+        for i in range(max_connections):
+            c = pycurl.Curl()
+            c.fp = None
+            c.setopt(pycurl.HTTPHEADER, ["User-Agent: debpartial-mirror"])
+            c.setopt(pycurl.FOLLOWLOCATION, 1)
+            c.setopt(pycurl.MAXREDIRS, 5)
+            c.setopt(pycurl.CONNECTTIMEOUT, 30)
+            c.setopt(pycurl.TIMEOUT, 300)
+            c.setopt(pycurl.NOSIGNAL, 1)
+            c.setopt(pycurl.PROGRESSFUNCTION, progress)
+            self.__objs.handles.append(c)
+
+        free = self.__objs.handles[:]
+        num_processed = 0
+        while 1:
+            while Download.queue and free:
+                url, filename = Download.queue.pop(0)
+                c = free.pop()
+                c.fp = open(filename, "wb")
+                c.setopt(pycurl.URL, url)
+                c.setopt(pycurl.WRITEDATA, c.fp)
+                self.__objs.add_handle(c)
+                # store some info
+                c.filename = filename
+                c.url = url
+
+            # Run the internal curl state machine for the multi stack
+            while 1:
+                ret, num_handles = self.__objs.perform()
+                if ret != pycurl.E_CALL_MULTI_PERFORM:
+                    break
+
+            # Check for curl objects which have terminated, and add them to the freelist
+            while 1:
+                num_q, ok_list, err_list = self.__objs.info_read()
+                for c in ok_list:
+                    c.fp.close()
+                    c.fp = None
+                    self.__objs.remove_handle(c)
+                    print "Success:", c.filename, c.url, c.getinfo(pycurl.EFFECTIVE_URL)
+                    freelist.append(c)
+                for c, errno, errmsg in err_list:
+                    c.fp.close()
+                    c.fp = None
+                    self.__objs.remove_handle(c)
+                    print "Failed: ", c.filename, c.url, errno, errmsg
+                    freelist.append(c)
+                    num_processed = num_processed + len(ok_list) + len(err_list)
+                if num_q == 0:
+                    break
+
+            # Currently no more I/O is pending, could do something in the meantime
+            # (display a progress bar, etc.).
+            # We just use select() to wait until some more data is available.
+            self.__objs.select()
+
+urls = open(sys.argv[1]).readlines()
+fileno = 1
+for url in urls:
+    url = string.strip(url)
+    if not url or url[0] == "#":
+        continue
+    filename = "doc_%d" % (fileno)
+    Download(url, filename)
+    fileno = fileno + 1
+del fileno