r225 - branches/rewrite/src
Otavio Salvador
partial-mirror-devel@lists.alioth.debian.org
Tue, 21 Sep 2004 10:44:36 -0600
Author: otavio
Date: Tue Sep 21 10:44:35 2004
New Revision: 225
Added:
branches/rewrite/src/Download.py (contents, props changed)
Log:
Add my initial draft for Download manager class.
Added: branches/rewrite/src/Download.py
==============================================================================
--- (empty file)
+++ branches/rewrite/src/Download.py Tue Sep 21 10:44:35 2004
@@ -0,0 +1,123 @@
+# debpartial-mirror - partial debian mirror package tool
+# (c) 2004 Otavio Salvador <otavio@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+# $Id$
+
+import pycurl, sys, string
+
+import signal
+from signal import SIGPIPE, SIG_IGN
+
+def progress(download_t, download_d, upload_t, upload_d):
+ print "Total to download", download_t
+ print "Total downloaded", download_d
+ print "Total to upload", upload_t
+ print "Total uploaded", upload_d
+
+class Download:
+ """ Download queue """
+ queue = []
+ """ Fetcher to use """
+ fetcher = None
+
+ def __init__(self, uri, destine):
+ if (uri, destine) not in self.queue:
+ self.queue.append((uri, destine))
+
+ print len(self.queue)
+
+ if not self.fetcher:
+ self.fetcher = DownloadFetcher(2)
+ else:
+ print "Usando fetcher anterior..."
+
+class DownloadFetcher:
+ __objs = None
+
+ def __init__(self, max_connections = 2):
+ """ We should ignore SIGPIPE when using pycurl.NOSIGNAL - see
+ the libcurl documentation `libcurl-the-guide' for more info."""
+ signal.signal(signal.SIGPIPE, signal.SIG_IGN)
+
+ """ Make the needed objects to handle the connections."""
+ self.__objs = pycurl.CurlMulti()
+ self.__objs.handles = []
+ for i in range(max_connections):
+ c = pycurl.Curl()
+ c.fp = None
+ c.setopt(pycurl.HTTPHEADER, ["User-Agent: debpartial-mirror"])
+ c.setopt(pycurl.FOLLOWLOCATION, 1)
+ c.setopt(pycurl.MAXREDIRS, 5)
+ c.setopt(pycurl.CONNECTTIMEOUT, 30)
+ c.setopt(pycurl.TIMEOUT, 300)
+ c.setopt(pycurl.NOSIGNAL, 1)
+ c.setopt(pycurl.PROGRESSFUNCTION, progress)
+ self.__objs.handles.append(c)
+
+ free = self.__objs.handles[:]
+ num_processed = 0
+ while 1:
+ while Download.queue and free:
+ url, filename = Download.queue.pop(0)
+ c = free.pop()
+ c.fp = open(filename, "wb")
+ c.setopt(pycurl.URL, url)
+ c.setopt(pycurl.WRITEDATA, c.fp)
+ self.__objs.add_handle(c)
+ # store some info
+ c.filename = filename
+ c.url = url
+
+ # Run the internal curl state machine for the multi stack
+ while 1:
+ ret, num_handles = self.__objs.perform()
+ if ret != pycurl.E_CALL_MULTI_PERFORM:
+ break
+
+ # Check for curl objects which have terminated, and add them to the freelist
+ while 1:
+ num_q, ok_list, err_list = self.__objs.info_read()
+ for c in ok_list:
+ c.fp.close()
+ c.fp = None
+ self.__objs.remove_handle(c)
+ print "Success:", c.filename, c.url, c.getinfo(pycurl.EFFECTIVE_URL)
+ freelist.append(c)
+ for c, errno, errmsg in err_list:
+ c.fp.close()
+ c.fp = None
+ self.__objs.remove_handle(c)
+ print "Failed: ", c.filename, c.url, errno, errmsg
+ freelist.append(c)
+ num_processed = num_processed + len(ok_list) + len(err_list)
+ if num_q == 0:
+ break
+
+ # Currently no more I/O is pending, could do something in the meantime
+ # (display a progress bar, etc.).
+ # We just use select() to wait until some more data is available.
+ self.__objs.select()
+
+urls = open(sys.argv[1]).readlines()
+fileno = 1
+for url in urls:
+ url = string.strip(url)
+ if not url or url[0] == "#":
+ continue
+ filename = "doc_%d" % (fileno)
+ Download(url, filename)
+ fileno = fileno + 1
+del fileno