r219 - /debtorrent/branches/unique/uniquely.py
camrdale-guest at users.alioth.debian.org
camrdale-guest at users.alioth.debian.org
Sat Aug 11 20:48:51 UTC 2007
Author: camrdale-guest
Date: Sat Aug 11 20:48:51 2007
New Revision: 219
URL: http://svn.debian.org/wsvn/debtorrent/?sc=1&rev=219
Log:
Update uniquely to order the pieces by full path name, and rewrite for readability.
Modified:
debtorrent/branches/unique/uniquely.py
Modified: debtorrent/branches/unique/uniquely.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/branches/unique/uniquely.py?rev=219&op=diff
==============================================================================
--- debtorrent/branches/unique/uniquely.py (original)
+++ debtorrent/branches/unique/uniquely.py Sat Aug 11 20:48:51 2007
@@ -2,35 +2,93 @@
"""Process a Release file, creating, finding and updating any torrent files."""
-import bsddb, sha, binascii
+import sha
import sys
import gzip
from bz2 import BZ2File
from math import ceil
from os import remove
from os.path import exists
-
-# Some default values
-default_piecesize = 512*1024
-extension = ".gz"
-# can not contain Date, Infohash, NextPiece or OriginalPieces
-default_hash_fields = ["Codename", "Suite", "Component", "Architecture",
+from time import strftime, gmtime
+
+# The piece size to use (must match the '-extrapieces' file's piece size)
+DEFAULT_PIECESIZE = 512*1024
+
+# The Packages files to read
+EXTENSION = ".gz"
+
+# The fields to hash to determine the torrent identifier
+# (can not contain Date, Infohash, NextPiece or OriginalPieces)
+DEFAULT_HASH_FIELDS = ["Codename", "Suite", "Component", "Architecture",
"PieceSize", "OriginalDate"]
-default_tracker = "http://dttracker.debian.net:6969/announce"
-header_order = ["Torrent", "Infohash", "InfohashArchs", "OriginalDate", "Date",
+
+# The tracker announce URL to use
+DEFAULT_TRACKER = "http://dttracker.debian.net:6969/announce"
+
+# The order to write the headers in (headers not listed won't be written)
+HEADER_ORDER = ["Torrent", "Infohash", "InfohashArchs", "OriginalDate", "Date",
"PieceSize", "NextPiece", "OriginalPieces", "Codename", "Suite",
"Component", "Architecture", "Tracker", "TorrentHashFields"]
+def read_release(filename):
+ """Read the headers and Packages file names from a Release file.
+
+ @type filename: C[string}
+ @param filename: the Release file to read
+ @rtype: C{dictionary}, C{list} of C{string}
+ @return: the headers and full file names of Packages files
+
+ """
+
+ # Initialize the Release file variables
+ release_dir = filename.rsplit('/', 1)[0]
+ read_packages = False
+ headers = {}
+ packages = []
+
+ f = open(filename, 'r')
+
+ for line in f:
+ line = line.rstrip()
+
+ if line[:1] != " ":
+ read_packages = False
+ try:
+ # Read the various headers from the file
+ h, v = line.split(":", 1)
+ if h == "MD5Sum" or h == "SHA1" or h == "SHA256":
+ read_packages = True
+ elif len(v) > 0:
+ headers[h] = v[1:]
+ except:
+ # Bad header line, just ignore it
+ print "WARNING: Ignoring badly formatted Release line:", line
+
+ # Skip to the next line
+ continue
+
+ # Read file names from the multiple hash sections of the file
+ if read_packages:
+ p = line.split()
+ if len(p) == 3 and p[2].endswith("Packages"+EXTENSION):
+ if release_dir + "/" + p[2] not in packages:
+ packages.append(release_dir + "/" + p[2])
+
+ f.close()
+
+ return headers, packages
+
def get_old(old_file):
"""Read the headers and piece ordering data from an old file.
@type old_file: C[string}
@param old_file: the old piece ordering file to open
- @rtype: (C{dictionary}, C{dictionary})
+ @rtype: C{dictionary}, C{dictionary}
@return: the old piece ordering (keys are the file names, values are the
starting piece number) and headers
"""
+
pieces = {}
headers = {}
@@ -62,12 +120,64 @@
# Delete the file and return empty variables to create a new torrent
if exists(old_file):
remove(old_file)
- pass
return pieces, headers
-def get_new(filename, old_files, headers, old_all_files, all_pieces,
- all_new_pieces):
+def update_headers(headers, release_headers, component, arch):
+ """Update the headers with new fields from the Release file.
+
+ @type headers: C{dictionary}
+ @param headers: the headers from the piece ordering file
+ @type release_headers: C{dictionary}
+ @param release_headers: the headers from the Release file
+ @type component: C{string}
+ @param component: the component name (e.g. main, contrib, non-free)
+ @type arch: C{string}
+ @param arch: the architecture name (e.g. i386, amd64, all)
+ @rtype: C{boolean}
+ @return: whether a new torrent has been created
+
+ """
+
+ # Set any required Release headers
+ if len(release_headers.get("Date", "")) == 0:
+ # Use today's date
+ release_headers["Date"] = strftime('%a, %d %b %Y %H:%M:%S +0000', gmtime())
+
+ # Create/update the headers
+ headers.setdefault("OriginalDate", release_headers["Date"])
+ headers["Date"] = release_headers["Date"]
+ headers.setdefault("PieceSize", str(DEFAULT_PIECESIZE))
+ headers.setdefault("NextPiece", str(0))
+ headers["Codename"] = release_headers.get("Codename", "")
+ headers["Suite"] = release_headers.get("Suite", "")
+ headers["Component"] = component
+ headers["Architecture"] = arch
+ headers.setdefault("Tracker", DEFAULT_TRACKER)
+ headers.setdefault("TorrentHashFields", " ".join(DEFAULT_HASH_FIELDS))
+
+ # Calculate the new hash
+ sha1 = sha.new()
+ for header in headers["TorrentHashFields"].split():
+ sha1.update(headers[header])
+ new_hash = sha1.hexdigest()
+
+ # Check if the hash has changed
+ if headers.get("Torrent", "") == new_hash:
+ return False
+ else:
+ # If it has, then reset the torrent to create a new one
+ headers["OriginalDate"] = release_headers["Date"]
+ headers["NextPiece"] = str(0)
+ headers.pop("OriginalPieces", "")
+ sha1 = sha.new()
+ for header in headers["TorrentHashFields"].split():
+ sha1.update(headers[header])
+ headers["Torrent"] = sha1.hexdigest()
+
+ return True
+
+def get_new(filename, old_files, old_all_files, all_pieces, all_new_pieces):
"""Read the new piece data from a Packages file.
Reads the Packages file, finding old files in it and copying their data to
@@ -84,8 +194,6 @@
@type old_files: C{dictionary}
@param old_files: the original piece ordering, keys are the file names,
values are the starting piece number
- @type headers: C{dictionary}
- @param headers: the original headers
@type old_all_files: C{dictionary}
@param old_all_files: the original piece ordering for architecture:all
files, keys are the file names, values are the starting piece number
@@ -101,10 +209,6 @@
"""
- # Get the needed header information
- next_piece = int(headers["NextPiece"])
- piece_size = int(headers["PieceSize"])
-
# Open the possibly compressed file
if filename.endswith(".gz"):
f = gzip.open(filename, 'r')
@@ -114,6 +218,7 @@
f = open(filename, 'r')
pieces = {}
+ new_pieces = []
p = [None, None, None]
for line in f:
@@ -138,9 +243,8 @@
pieces[old_files[p[0]]] = p[0]
del old_files[p[0]]
else:
- # Add new file to the end of the torrent
- pieces[next_piece] = p[0]
- next_piece += int(ceil(p[1]/float(piece_size)))
+ # Found new file, save it for later processing
+ new_pieces.append((p[0], p[1]))
p = [None, None, None]
if line[:9] == "Filename:":
@@ -152,258 +256,186 @@
f.close()
+ return pieces, new_pieces
+
+def add_new(pieces, new_pieces, headers):
+ """Read the new piece data from a Packages file.
+
+ Adds new files to the end of the piece ordering. The 'pieces' input is
+ modified by having the new pieces added to it. The 'new_pieces' input
+ list is sorted. The 'NextPiece' header in the input 'headers' is updated.
+
+ @type pieces: C{dictionary}
+ @param pieces: the current piece ordering, keys are the starting piece
+ numbers, values are the file names
+ @type new_pieces: C{list} of (C{string}, C{long})
+ @param new_pieces: the file name and file size of the new files that have
+ been found and are to be added to the pirce ordering
+ @type headers: C{dictionary}
+ @param headers: the headers from the piece ordering file
+
+ """
+
+ # Get the needed header information
+ next_piece = int(headers["NextPiece"])
+ piece_size = int(headers["PieceSize"])
+
+ new_pieces.sort()
+ old_file = ""
+ old_size = 0L
+ for (file, size) in new_pieces:
+ if file == old_file:
+ if size != old_size:
+ print "WARNING: multiple files with different size:", file
+ else:
+ pieces[next_piece] = file
+ next_piece += int(ceil(size/float(piece_size)))
+
+ old_file = file
+ old_size = size
+
+ # Set the final header values
headers["NextPiece"] = str(next_piece)
-
- return pieces
-
-#cache_file = sys.argv[1]
-#cache = bsddb.btopen(cache_file, "w")
-
-# The only input is the Release file to process
-releasefile = sys.argv[1]
-print "Processing: %s" % releasefile
-
-# Initialize the Release file variables
-release_dir = releasefile.rsplit('/', 1)[0]
-origin = ""
-label = ""
-suite = ""
-codename = ""
-date = ""
-components = []
-archs = []
-read_files = False
-packages = []
-packages_sha1 = {}
-packages_size = {}
-
-f = open(releasefile, 'r')
-
-for line in f:
- line = line.rstrip()
-
- # Read the various headers from the file
- if line[:7] == "Origin:":
- origin = line[8:]
- if line[:6] == "Label:":
- label = line[7:]
- if line[:6] == "Suite:":
- suite = line[7:]
- if line[:9] == "Codename:":
- codename = line[10:]
- if line[:5] == "Date:":
- date = line[6:]
- if line[:11] == "Components:":
- components = line[12:].split()
- if line[:14] == "Architectures:":
- archs = line[15:].split()
-
- # Read multiple lines from the SHA1 section of the file
- if line[:1] != " ":
- read_files = False
- if read_files:
- p = line.split()
- if len(p) == 3 and p[2].endswith("Packages"+extension):
- packages.append(release_dir + "/" + p[2])
- packages_sha1[p[2]] = binascii.a2b_hex(p[0])
- packages_size[p[2]] = long(p[1])
- if line[:7] == "MD5Sum:":
- read_files = True
-
-f.close()
-
-torrent_prefix = "dists_" + codename + "_"
-torrent_suffix = "_Packages-torrent.gz"
-
-for component in components:
- # Get the old 'all' data
- all_file = torrent_prefix + component + "_binary-all" + torrent_suffix
- old_all_files, all_headers = get_old(all_file)
- all_pieces = {}
- all_new_pieces = []
- new_all_torrent = False
-
- # Create the all headers
- all_headers.setdefault("OriginalDate", date)
- all_headers["Date"] = date
- all_headers.setdefault("PieceSize", str(default_piecesize))
- all_headers.setdefault("NextPiece", str(0))
- all_headers["Codename"] = codename
- all_headers["Suite"] = suite
- all_headers["Component"] = component
- all_headers["Architecture"] = "all"
- all_headers.setdefault("Tracker", default_tracker)
- all_headers.setdefault("TorrentHashFields", " ".join(default_hash_fields))
-
- # Calculate the new hash
- sha1 = sha.new()
- for header in all_headers["TorrentHashFields"].split():
- sha1.update(all_headers[header])
- new_hash = sha1.hexdigest()
-
- # Check if the hash has changed
- if all_headers.get("Torrent", "") != new_hash:
- # If it has, then reset the torrent
- new_all_torrent = True
- old_all_files = {}
- all_headers["OriginalDate"] = date
- all_headers["NextPiece"] = str(0)
- all_headers.pop("OriginalPieces", "")
- sha1 = sha.new()
- for header in all_headers["TorrentHashFields"].split():
- sha1.update(all_headers[header])
- all_headers["Torrent"] = sha1.hexdigest()
-
- for arch in archs:
- torrent_file = torrent_prefix + component + "_binary-" + arch + torrent_suffix
-
- # Find the Packages file that will be parsed
- found = False
- for filename in packages:
- if (filename.find(component) >= 0 and
- filename.find("binary-"+arch) >= 0):
- found = True
- break
- if not found:
- print "WARNING: no matching Packages file for component %s, arch %s" % (component, arch)
- if exists(torrent_file):
- remove(torrent_file)
- continue
- packages.pop(packages.index(filename))
-
- # Get the old data for this torrent, if any existed
- print torrent_file + ": reading ...",
+ headers.setdefault("OriginalPieces", headers["NextPiece"])
+
+def write_file(filename, pieces, headers):
+ """Print the new data to the file.
+
+ @type filename: C[string}
+ @param filename: the file to write to
+ @type pieces: C{dictionary}
+ @param pieces: the current piece ordering, keys are the starting piece
+ numbers, values are the file names
+ @type headers: C{dictionary}
+ @param headers: the headers from the piece ordering file
+
+ """
+
+ f = gzip.open(filename, 'w')
+
+ # Write the headers
+ for header in HEADER_ORDER:
+ if header in headers:
+ f.write("%s: %s\n" % (header, headers[header]))
+ f.write("PieceNumbers:\n")
+
+ # Write the starting piece numbers
+ ps = pieces.keys()
+ ps.sort()
+ format_string = " %"+str(len(str(max(ps))))+"d %s\n"
+ for p in ps:
+ f.write(format_string % (p, pieces[p]))
+
+ f.close()
+
+def run(releasefile):
+ """Process a single Release file.
+
+ @type releasefile: C[string}
+ @param releasefile: the Release file to process
+
+ """
+
+ # Process the Release file
+ print "Processing: %s" % releasefile
+ release_headers, packages = read_release(releasefile)
+
+ torrent_prefix = "dists_" + release_headers.get("Codename", "") + "_"
+ torrent_suffix = "_Packages-torrent.gz"
+
+ for component in release_headers.get("Components", "").split():
+ # Get the old 'all' data
+ all_file = torrent_prefix + component + "_binary-all" + torrent_suffix
+ old_all_pieces, all_headers = get_old(all_file)
+ all_pieces = {}
+ all_new_pieces = []
+ new_all_torrent = False
+
+ # First update the 'all' headers
+ if update_headers(all_headers, release_headers, component, "all"):
+ # If it has, then reset the torrent
+ new_all_torrent = True
+ old_all_pieces = {}
+
+ for arch in release_headers.get("Architectures", "").split():
+ torrent_file = torrent_prefix + component + "_binary-" + arch + torrent_suffix
+
+ # Find the Packages file that will be parsed
+ found = False
+ for filename in packages:
+ if (filename.find(component) >= 0 and
+ filename.find("binary-"+arch) >= 0):
+ found = True
+ break
+ if not found:
+ print "WARNING: no matching Packages file for component %s, arch %s" % (component, arch)
+ if exists(torrent_file):
+ remove(torrent_file)
+ continue
+ packages.pop(packages.index(filename))
+
+ # Get the old data for this torrent, if any existed
+ print torrent_file + ": reading ...",
+ sys.stdout.flush()
+ old_pieces, headers = get_old(torrent_file)
+
+ # Update the headers from the Release file ones
+ if update_headers(headers, release_headers, component, arch):
+ print "new torrent created ...",
+ sys.stdout.flush()
+ old_pieces = {}
+
+ # Parse the Packages file for the new data
+ print "updating ...",
+ sys.stdout.flush()
+ pieces, new_pieces = get_new(filename, old_pieces, old_all_pieces,
+ all_pieces, all_new_pieces)
+
+ if pieces or new_pieces:
+ # Add any new pieces to the end of pieces
+ add_new(pieces, new_pieces, headers)
+
+ # Write the headers
+ print "writing ...",
+ sys.stdout.flush()
+ write_file(torrent_file, pieces, headers)
+ else:
+ print "empty ...",
+ if exists(torrent_file):
+ remove(torrent_file)
+
+ print "done."
+
+ print all_file + ": reading ...",
+ if new_all_torrent:
+ print "new torrent created ...",
sys.stdout.flush()
- old_files, headers = get_old(torrent_file)
-
- # Create the headers
- headers.setdefault("OriginalDate", date)
- headers["Date"] = date
- headers.setdefault("PieceSize", str(default_piecesize))
- headers.setdefault("NextPiece", str(0))
- headers["Codename"] = codename
- headers["Suite"] = suite
- headers["Component"] = component
- headers["Architecture"] = arch
- headers.setdefault("Tracker", default_tracker)
- headers.setdefault("TorrentHashFields", " ".join(default_hash_fields))
+ # If there were 'all' files found
+ if all_pieces or all_new_pieces:
+ # Process the new 'all' files found
+ print "updating ...",
+ sys.stdout.flush()
+ add_new(all_pieces, all_new_pieces, all_headers)
- # Calculate the new hash
- sha1 = sha.new()
- for header in headers["TorrentHashFields"].split():
- sha1.update(headers[header])
- new_hash = sha1.hexdigest()
-
- # Check if the hash has changed
- if headers.get("Torrent", "") != new_hash:
- # If it has, then reset the torrent
- print "new torrent created ...",
- sys.stdout.flush()
- old_files = {}
- headers["OriginalDate"] = date
- headers["NextPiece"] = str(0)
- headers.pop("OriginalPieces", "")
- sha1 = sha.new()
- for header in headers["TorrentHashFields"].split():
- sha1.update(headers[header])
- headers["Torrent"] = sha1.hexdigest()
-
- # Parse the Packages file for the new data
- print "updating ...",
- sys.stdout.flush()
- new_pieces = get_new(filename, old_files, headers, old_all_files,
- all_pieces, all_new_pieces)
-
- # Set the final header values
- headers.setdefault("OriginalPieces", headers["NextPiece"])
-
- if new_pieces:
- # Write the headers
+ # Write the all_headers
print "writing ...",
sys.stdout.flush()
- f = gzip.open(torrent_file, 'w')
- for header in header_order:
- if header in headers:
- f.write("%s: %s\n" % (header, headers[header]))
- f.write("PieceNumbers:\n")
-
- # Write the starting piece numbers
- pieces = new_pieces.keys()
- pieces.sort()
- format_string = " %"+str(len(str(max(pieces))))+"d %s\n"
- for piece in pieces:
- f.write(format_string % (piece, new_pieces[piece]))
-
- f.close()
+ write_file(all_file, all_pieces, all_headers)
else:
print "empty ...",
- if exists(torrent_file):
- remove(torrent_file)
-
+ if exists(all_file):
+ remove(all_file)
+
print "done."
-
- print all_file + ": reading ...",
- if new_all_torrent:
- print "new torrent created ...",
- sys.stdout.flush()
- # If there were 'all' files found
- if all_pieces or all_new_pieces:
- # Process the new 'all' files found
- print "updating ...",
- sys.stdout.flush()
- next_piece = int(all_headers["NextPiece"])
- piece_size = int(all_headers["PieceSize"])
- all_new_pieces.sort()
- old_file = ""
- old_size = 0L
- for (file, size) in all_new_pieces:
- if file == old_file:
- if size != old_size:
- print "WARNING: multiple architecture:all files with different size:", file
- else:
- all_pieces[next_piece] = file
- next_piece += int(ceil(size/float(piece_size)))
-
- old_file = file
- old_size = size
-
- # Set the final header values
- all_headers["NextPiece"] = str(next_piece)
- all_headers.setdefault("OriginalPieces", all_headers["NextPiece"])
-
- # Write the all_headers
- print "writing ...",
- sys.stdout.flush()
- f = gzip.open(all_file, 'w')
- for header in header_order:
- if header in all_headers:
- f.write("%s: %s\n" % (header, all_headers[header]))
- f.write("PieceNumbers:\n")
-
- # Write the all starting piece numbers
- pieces = all_pieces.keys()
- pieces.sort()
- format_string = " %"+str(len(str(max(pieces))))+"d %s\n"
- for piece in pieces:
- f.write(format_string % (piece, all_pieces[piece]))
-
- f.close()
+
+ if packages:
+ print "The following packages files were not used:"
+ for package in packages:
+ print " %s" % package
+
+if __name__ == '__main__':
+ if len(sys.argv) >= 2:
+ for file in sys.argv[1:]:
+ run(file)
else:
- print "empty ...",
- if exists(all_file):
- remove(all_file)
-
- print "done."
-
-if packages:
- print "The following packages files were not used:"
- for package in packages:
- print " %s" % package
-
-
-# fnkey = filename + ":pc"
-# if cache.has_key(fnkey):
-# sha1, result = str2hash(cache[fnkey])
-# cache[fnkey] = values
-#cache.sync()
-#cache.close()
+ print "Usage: " + sys.argv[0] + " Releasefile [Releasefile ...]"
More information about the Debtorrent-commits
mailing list