r210 - /debtorrent/branches/unique/uniquely.py
camrdale-guest at users.alioth.debian.org
camrdale-guest at users.alioth.debian.org
Thu Aug 9 18:21:55 UTC 2007
Author: camrdale-guest
Date: Thu Aug 9 18:21:55 2007
New Revision: 210
URL: http://svn.debian.org/wsvn/debtorrent/?sc=1&rev=210
Log:
Mostly finished uniquely script.
Modified:
debtorrent/branches/unique/uniquely.py
Modified: debtorrent/branches/unique/uniquely.py
URL: http://svn.debian.org/wsvn/debtorrent/debtorrent/branches/unique/uniquely.py?rev=210&op=diff
==============================================================================
--- debtorrent/branches/unique/uniquely.py (original)
+++ debtorrent/branches/unique/uniquely.py Thu Aug 9 18:21:55 2007
@@ -1,10 +1,12 @@
#!/usr/bin/env python
+"""Process a Release file, creating, finding and updating any torrent files."""
+
import bsddb, sha, binascii
-import os, sys
+import sys
import gzip
-from StringIO import StringIO
-from math import ceil, log
+from bz2 import BZ2File
+from math import ceil
# Some default values
default_piecesize = 512*1024
@@ -12,17 +14,151 @@
default_hash_fields = ["Codename", "Suite", "Component", "Architecture",
"PieceSize", "OriginalDate"]
header_order = ["Torrent", "Infohash", "OriginalDate", "Date", "PieceSize",
- "Codename", "Suite", "Component", "Architecture",
- "TorrentHashFields"]
+ "NextPiece", "OriginalPieces", "Codename", "Suite",
+ "Component", "Architecture", "TorrentHashFields"]
+
+def get_old(old_file):
+ """Read the headers and piece ordering data from an old file.
+
+ @type old_file: C[string}
+ @param old_file: the old piece ordering file to open
+ @rtype: (C{dictionary}, C{dictionary})
+ @return: the old piece ordering (keys are the file names, values are the
+ starting piece number) and headers
+
+ """
+ pieces = {}
+ headers = {}
+
+ try:
+ f = gzip.open(old_file, 'r')
+
+ # Read the headers from the file
+ for line in f:
+ line = line.rstrip()
+
+ h, v = line.split(":", 1)
+ if h == "PieceNumbers":
+ break
+
+ headers[h] = v[1:]
+
+ # Read the piece ordering from the file
+ for line in f:
+ line = line.rstrip()
+
+ if line[:1] != " ":
+ break
+
+ piece, file = line.split()
+ pieces[file] = int(piece)
+
+ f.close()
+ except:
+ # Just return the empty variables, causing a new torrent to be generated
+ pass
+
+ return pieces, headers
+
+def get_new(filename, old_files, headers, old_all_files, all_pieces,
+ all_new_pieces):
+ """Read the new piece data from a Packages file.
+
+ Reads the Packages file, finding old files in it and copying their data to
+ the new ordering, and adding any new files found to the end of the
+ ordering. The old_files input is modified by removing the found files from
+ it, and the 'NextPiece' header in the input headers is changed.
+
+ Any architecture:all files found are processed and added to the 'all'
+ piece ordering. This is done by modifying the input old_all_files,
+ all_pieces, and all_new_pieces variables.
+
+ @type filename: C[string}
+ @param filename: the Packages file to open and parse
+ @type old_files: C{dictionary}
+ @param old_files: the original piece ordering, keys are the file names,
+ values are the starting piece number
+ @type headers: C{dictionary}
+ @param headers: the original headers
+ @type old_all_files: C{dictionary}
+ @param old_all_files: the original piece ordering for architecture:all
+ files, keys are the file names, values are the starting piece number
+ @type all_pieces: C{dictionary}
+ @param all_pieces: the new piece ordering for architecture:all files,
+ keys are the starting piece numbers, values are the file names
+ @type all_new_pieces: C{list} of (C{string}, C{long})
+ @param all_new_pieces: the file name and file size of the new
+ architecture:all files that have been found
+ @rtype: C{dictionary}
+ @return: the new piece ordering, keys are the starting piece numbers,
+ values are the file names
+
+ """
+
+ # Get the needed header information
+ next_piece = int(headers["NextPiece"])
+ piece_size = int(headers["PieceSize"])
+
+ # Open the possibly compressed file
+ if filename.endswith(".gz"):
+ f = gzip.open(filename, 'r')
+ elif filename.endswith(".bz2"):
+ f = BZ2File(filename, "r")
+ else:
+ f = open(filename, 'r')
+
+ pieces = {}
+
+ p = [None, None, None]
+ for line in f:
+ line = line.rstrip()
+ if line == "":
+ if (p[0] and p[1] and p[2]):
+ # Check which torrent to add the info to
+ if p[2] == 'all':
+ if p[0] in all_pieces.values():
+ # Already found the old file
+ pass
+ elif p[0] in old_all_files:
+ # Found old file, so add it
+ all_pieces[old_all_files[p[0]]] = p[0]
+ del old_all_files[p[0]]
+ elif (p[0], p[1]) not in all_new_pieces:
+ # Found new file, save it for later processing
+ all_new_pieces.append((p[0], p[1]))
+ else:
+ if p[0] in old_files:
+ # Found old file, so add it
+ pieces[old_files[p[0]]] = p[0]
+ del old_files[p[0]]
+ else:
+ # Add new file to the end of the torrent
+ pieces[next_piece] = p[0]
+ next_piece += int(ceil(p[1]/float(piece_size)))
+
+ p = [None, None, None]
+ if line[:9] == "Filename:":
+ p[0] = line[10:]
+ if line[:5] == "Size:":
+ p[1] = long(line[6:])
+ if line[:13] == "Architecture:":
+ p[2] = line[14:]
+
+ f.close()
+
+ headers["NextPiece"] = str(next_piece)
+
+ return pieces
#cache_file = sys.argv[1]
-#%cache = bsddb.btopen(cache_file, "w")
+#cache = bsddb.btopen(cache_file, "w")
# The only input is the Release file to process
releasefile = sys.argv[1]
print "Processing: %s" % releasefile
# Initialize the Release file variables
+release_dir = releasefile.rsplit('/', 1)[0]
origin = ""
label = ""
suite = ""
@@ -42,7 +178,7 @@
# Read the various headers from the file
if line[:7] == "Origin:":
- origin = line[7:]
+ origin = line[8:]
if line[:6] == "Label:":
label = line[7:]
if line[:6] == "Suite:":
@@ -55,17 +191,17 @@
components = line[12:].split()
if line[:14] == "Architectures:":
archs = line[15:].split()
-
+
# Read multiple lines from the SHA1 section of the file
if line[:1] != " ":
read_files = False
if read_files:
p = line.split()
- if len(p) == 3 and p[2].EndsWith("Packages"+extension):
- packages.append(p[2])
+ if len(p) == 3 and p[2].endswith("Packages"+extension):
+ packages.append(release_dir + "/" + p[2])
packages_sha1[p[2]] = binascii.a2b_hex(p[0])
- packages_size[p[2]] = long(p[2])
- if line[:5] == "SHA1:":
+ packages_size[p[2]] = long(p[1])
+ if line[:7] == "MD5Sum:":
read_files = True
f.close()
@@ -77,16 +213,35 @@
# Get the old 'all' data
all_file = torrent_prefix + component + "_binary-all" + torrent_suffix
old_all_files, all_headers = get_old(all_file)
+ all_pieces = {}
+ all_new_pieces = []
+
+ # Create the all headers
+ all_headers.setdefault("OriginalDate", date)
+ all_headers["Date"] = date
+ all_headers.setdefault("PieceSize", str(default_piecesize))
+ all_headers.setdefault("NextPiece", str(0))
+ all_headers["Codename"] = codename
+ all_headers["Suite"] = suite
+ all_headers["Component"] = component
+ all_headers["Architecture"] = "all"
+ all_headers.setdefault("TorrentHashFields", " ".join(default_hash_fields))
+ if "Torrent" not in all_headers:
+ sha1 = sha.new()
+ for header in all_headers["TorrentHashFields"].split():
+ sha1.update(all_headers[header])
+ all_headers["Torrent"] = sha1.hexdigest()
for arch in archs:
# Find the Packages file that will be parsed
found = False
for filename in packages:
- if filename.find(component) >= 0 and filename.find("binary-"+arch) >= 0:
+ if (filename.find(component) >= 0 and
+ filename.find("binary-"+arch) >= 0):
found = True
break
if not found:
- print "WARNING: no matching Packages file for component %s, arch %s" % component, arch
+ print "WARNING: no matching Packages file for component %s, arch %s" % (component, arch)
continue
packages.pop(packages.index(filename))
@@ -96,16 +251,15 @@
old_files, headers = get_old(torrent_file)
# Create the headers
- if "OriginalDate" not in headers:
- headers["OriginalDate"] = date
- if "PieceSize" not in headers:
- headers["PieceSize"] = default_piecesize
+ headers.setdefault("OriginalDate", date)
+ headers["Date"] = date
+ headers.setdefault("PieceSize", str(default_piecesize))
+ headers.setdefault("NextPiece", str(0))
headers["Codename"] = codename
headers["Suite"] = suite
headers["Component"] = component
headers["Architecture"] = arch
- if "TorrentHashFields" not in headers:
- headers["TorrentHashFields"] = " ".join(default_hash_fields)
+ headers.setdefault("TorrentHashFields", " ".join(default_hash_fields))
if "Torrent" not in headers:
sha1 = sha.new()
for header in headers["TorrentHashFields"].split():
@@ -114,59 +268,72 @@
# Parse the Packages file for the new data
print "updating ... ",
- new_files, removed_files = get_new(filename, old_files, headers["PieceSize"])
-
- # Write the headers
+ new_pieces = get_new(filename, old_files, headers, old_all_files,
+ all_pieces, all_new_pieces)
+
+ headers.setdefault("OriginalPieces", headers["NextPiece"])
+
+ if new_pieces:
+ # Write the headers
+ print "writing ... ",
+ f = gzip.open(torrent_file, 'w')
+ for header in header_order:
+ if header in headers:
+ f.write("%s: %s\n" % (header, headers[header]))
+ f.write("PieceNumbers:\n")
+
+ # Write the starting piece numbers
+ pieces = new_pieces.keys()
+ pieces.sort()
+ format_string = " %"+str(len(str(max(pieces))))+"d %s\n"
+ for piece in pieces:
+ f.write(format_string % (piece, new_pieces[piece]))
+
+ f.close()
+ print "done."
+
+ print all_file + ": ",
+ # If there were 'all' files found
+ if all_pieces or all_new_pieces:
+ # Process the new 'all' files found
+ print "updating ... ",
+ next_piece = int(all_headers["NextPiece"])
+ piece_size = int(all_headers["PieceSize"])
+ all_new_pieces.sort()
+ old_file = ""
+ old_size = 0L
+ for (file, size) in all_new_pieces:
+ if file == old_file:
+ if size != old_size:
+ print "WARNING: multiple architecture:all files with different size:", file
+ else:
+ all_pieces[next_piece] = file
+ next_piece += int(ceil(size/float(piece_size)))
+
+ old_file = file
+ old_size = size
+
+ # Set the new next piece to use
+ all_headers["NextPiece"] = str(next_piece)
+ all_headers.setdefault("OriginalPieces", all_headers["NextPiece"])
+
+ # Write the all_headers
print "writing ... ",
- f = gzip.open(torrent_file, 'w')
+ f = gzip.open(all_file, 'w')
for header in header_order:
- if header in headers:
- f.write("%s: %s\n" % header, headers[header])
+ if header in all_headers:
+ f.write("%s: %s\n" % (header, all_headers[header]))
f.write("PieceNumbers:\n")
-
- # Write the starting piece numbers
- pieces = new_pieces.keys()
+
+ # Write the all starting piece numbers
+ pieces = all_pieces.keys()
pieces.sort()
- format_string = " %"+str(int(ceil(log(max(pieces), 10))))+"d %s\n"
+ format_string = " %"+str(len(str(max(pieces))))+"d %s\n"
for piece in pieces:
- f.write(format_string % piece, new_pieces[piece])
+ f.write(format_string % (piece, all_pieces[piece]))
f.close()
- print "done."
-
- # Create the all headers
- if "OriginalDate" not in all_headers:
- all_headers["OriginalDate"] = date
- if "PieceSize" not in all_headers:
- all_headers["PieceSize"] = default_piecesize
- all_headers["Codename"] = codename
- all_headers["Suite"] = suite
- all_headers["Component"] = component
- all_headers["Architecture"] = "all"
- if "TorrentHashFields" not in all_headers:
- all_headers["TorrentHashFields"] = " ".join(default_hash_fields)
- if "Torrent" not in all_headers:
- sha1 = sha.new()
- for header in all_headers["TorrentHashFields"].split():
- sha1.update(all_headers[header])
- all_headers["Torrent"] = sha1.hexdigest()
-
- # Write the all_headers
- print all_file + ": writing ... ",
- f = gzip.open(all_file, 'w')
- for header in header_order:
- if header in all_headers:
- f.write("%s: %s\n" % header, all_headers[header])
- f.write("PieceNumbers:\n")
-
- # Write the all starting piece numbers
- pieces = all_new_pieces.keys()
- pieces.sort()
- format_string = " %"+str(int(ceil(log(max(pieces), 10))))+"d %s\n"
- for piece in pieces:
- f.write(format_string % piece, all_new_pieces[piece])
-
- f.close()
+
print "done."
if packages:
@@ -174,11 +341,10 @@
for package in packages:
print " %s" % package
-"""
- fnkey = filename + ":pc"
- if cache.has_key(fnkey):
- sha1, result = str2hash(cache[fnkey])
- cache[fnkey] = values
-"""
+
+# fnkey = filename + ":pc"
+# if cache.has_key(fnkey):
+# sha1, result = str2hash(cache[fnkey])
+# cache[fnkey] = values
#cache.sync()
#cache.close()
More information about the Debtorrent-commits
mailing list