[Pkg-bazaar-commits] r131 ./bzr-builddeb/people/jdw/merge_upstream: Add an (untested) import-snapshot command that imports from snapshot.d.n.

James Westby jw+debian at jameswestby.net
Tue Jun 26 20:55:43 UTC 2007


------------------------------------------------------------
revno: 131
committer: James Westby <jw+debian at jameswestby.net>
branch nick: merge_upstream
timestamp: Tue 2007-06-26 21:55:43 +0100
message:
  Add an (untested) import-snapshot command that imports from snapshot.d.n.
  
  The command gets the old versions of the package from s.d.n and imports them
  all in to a new branch for you.
  
  Also fix up the import code to only import a file once.
  
  There needs to be some testing for this, but I need to work out how to do it,
  and there will probably be some complexity in setting it up. Also the way
  the implementation is done means that there is lots of special cases. These
  should be removed.
modified:
  __init__.py
  import_dsc.py
  tests/test_import_dsc.py
-------------- next part --------------
=== modified file '__init__.py'
--- a/__init__.py	2007-06-24 15:01:46 +0000
+++ b/__init__.py	2007-06-26 20:55:43 +0000
@@ -330,6 +330,21 @@
 register_command(cmd_merge_upstream)
 
 
+class cmd_import_snapshot(Command):
+
+  takes_args = ['package', 'directory?']
+
+  def run(self, package, directory=None):
+    from import_dsc import SnapshotImporter
+    if directory is None:
+      directory = package
+    importer = SnapshotImporter(package)
+    importer.do_import(directory)
+
+
+register_command(cmd_import_snapshot)
+
+
 def test_suite():
     from unittest import TestSuite
     import tests

=== modified file 'import_dsc.py'
--- a/import_dsc.py	2007-06-26 18:45:53 +0000
+++ b/import_dsc.py	2007-06-26 20:55:43 +0000
@@ -28,9 +28,12 @@
 
 from bzrlib import (bzrdir,
                     generate_ids,
+                    urlutils,
                     )
 from bzrlib.errors import FileExists, BzrError
+from bzrlib.trace import warning, info
 from bzrlib.transform import TreeTransform
+from bzrlib.transport import get_transport
 
 from bzrlib.plugins.bzrtools.upstream_import import (import_tar,
                                                      common_directory,
@@ -43,16 +46,31 @@
 # TODO: support native packages (should be easy).
 # TODO: Use a transport to retrieve the files, so that they can be got remotely
 
+def open_file(path, transport, base_dir=None):
+  """Open a file, possibly over a transport.
+
+  Open the named path, using the transport if not None. If the transport and
+  base_dir are not None, then path will be interpreted relative to base_dir.
+  """
+  if transport is None:
+    return open(path, 'rb')
+  else:
+    if base_dir is not None:
+      path = urlutils.join(base_dir, path)
+    return transport.get(path)
+
+
 class DscCache(object):
 
-  def __init__(self):
+  def __init__(self, transport=None):
     self.cache = {}
+    self.transport = transport
 
   def get_dsc(self, name):
     if name in self.cache:
       dsc1 = self.cache[name]
     else:
-      f1 = open(name)
+      f1 = open_file(name, self.transport)
       try:
         dsc1 = deb822.Dsc(f1)
       finally:
@@ -77,8 +95,9 @@
     return -1
 
 
-def import_orig(tree, origname, version, last_upstream=None):
-  f = open(origname, 'rb')
+def import_orig(tree, origname, version, last_upstream=None, transport=None,
+                base_dir=None):
+  f = open_file(origname, transport, base_dir=base_dir)
   try:
     dangling_revid = None
     if last_upstream is not None:
@@ -92,8 +111,6 @@
       revno = tree.branch.revision_id_to_revno(old_upstream_revid)
       tree.branch.set_last_revision_info(revno, old_upstream_revid)
     tree.commit('import upstream from %s' % (os.path.basename(origname)))
-    if last_upstream is not None:
-      tree.merge_from_branch(tree.branch, to_revision=dangling_revid)
     upstream_version = version.upstream_version
     tree.branch.tags.set_tag(make_upstream_tag(upstream_version),
                              tree.branch.last_revision())
@@ -102,7 +119,8 @@
   return dangling_revid
 
 
-def import_diff(tree, diffname, version, dangling_revid=None):
+def import_diff(tree, diffname, version, dangling_revid=None,
+                transport=None, base_dir=None):
   upstream_version = version.upstream_version
   up_revid = tree.branch.tags.lookup_tag(make_upstream_tag(upstream_version))
   up_tree = tree.branch.repository.revision_tree(up_revid)
@@ -111,8 +129,9 @@
   else:
     current_revid = dangling_revid
   current_tree = tree.branch.repository.revision_tree(current_revid)
-  tree.revert(['.'], tree.branch.repository.revision_tree(up_revid))
-  f = gzip.GzipFile(diffname, 'rb')
+  tree.revert([], tree.branch.repository.revision_tree(up_revid))
+  f = open_file(diffname, transport, base_dir=base_dir)
+  f = gzip.GzipFile(fileobj=f)
   try:
     cmd = ['patch', '--strip', '1', '--quiet', '--directory', tree.basedir]
     child_proc = Popen(cmd, stdin=PIPE)
@@ -166,22 +185,32 @@
           tree.add([path])
         else:
           tree.add([path], [file_id])
+    if dangling_revid is not None:
+      tree.add_parent_tree_id(dangling_revid)
     tree.commit('merge packaging changes from %s' % \
                 (os.path.basename(diffname)))
   finally:
     f.close()
 
 
-def import_dsc(target_dir, dsc_files):
+def import_dsc(target_dir, dsc_files, transport=None):
   if os.path.exists(target_dir):
     raise FileExists(target_dir)
-  cache = DscCache()
+  cache = DscCache(transport=transport)
   dsc_files.sort(cmp=DscComp(cache).cmp)
   safe_files = []
+  package_name = None
   for dscname in dsc_files:
     dsc = cache.get_dsc(dscname)
     orig_file = None
     diff_file = None
+    if package_name is not None and dsc['Source'] != package_name:
+      raise ImportError("The reported package name has changed from %s to "
+                        "%s. I don't know what to do in this case. If this "
+                        "case should be handled, please contact the author "
+                        "with details of your case, and the expected outcome."
+                        % (package_name, dsc['Source']))
+    package_name = dsc['Source']
     for file_details in dsc['files']:
       name = file_details['name']
       if name.endswith('.orig.tar.gz'):
@@ -196,8 +225,15 @@
       raise ImportError("%s contains only a .orig.tar.gz, it must contain a "
                         ".diff.gz as well" % dscname)
     version = Version(dsc['Version'])
+    base_dir = urlutils.split(dscname)[0]
     if orig_file is not None:
-      safe_files.append((orig_file, version, 'orig'))
+      found = False
+      for safe_file in safe_files:
+        if orig_file == safe_file[0]:
+          found = True
+          break
+      if not found:
+        safe_files.append((orig_file, version, 'orig', base_dir))
     found = False
     for safe_file in safe_files:
       if safe_file[0].endswith("_%s.orig.tar.gz" % version.upstream_version):
@@ -206,7 +242,13 @@
     if found == False:
       raise ImportError("There is no upstream version corresponding to %s" % \
                           diff_file)
-    safe_files.append((diff_file, version, 'diff'))
+    found = False
+    for safe_file in safe_files:
+      if diff_file == safe_file[0]:
+        found = True
+        break
+    if not found:
+      safe_files.append((diff_file, version, 'diff', base_dir))
   os.mkdir(target_dir)
   format = bzrdir.format_registry.make_bzrdir('dirstate-tags')
   branch  = bzrdir.BzrDir.create_branch_convenience(target_dir,
@@ -216,14 +258,94 @@
   try:
     last_upstream = None
     dangling_revid = None
-    for (filename, version, type) in safe_files:
+    for (filename, version, type, base_dir) in safe_files:
       if type == 'orig':
         dangling_revid = import_orig(tree, filename, version,
-                                     last_upstream=last_upstream)
+                                     last_upstream=last_upstream,
+                                     transport=transport,
+                                     base_dir=base_dir)
+        info("imported %s" % filename)
         last_upstream = version.upstream_version
       elif type == 'diff':
-        import_diff(tree, filename, version, dangling_revid=dangling_revid)
+        import_diff(tree, filename, version, dangling_revid=dangling_revid,
+                    transport=transport, base_dir=base_dir)
+        info("imported %s" % filename)
         dangling_revid = None
   finally:
     tree.unlock()
 
+
+class SourcesImporter(object):
+  """For importing all the .dsc files from a Sources file."""
+
+  def __init__(self, base, sources_path):
+    """Create a SourcesImporter.
+
+    :param base: the base URI from which all paths should be interpreted.
+    :type base: string
+    :param sources_path: the path to the Sources file to import the
+                         packages from, relative to the base parameter.
+    :type base: string
+    """
+    self.base = urlutils.normalize_url(base)
+    if isinstance(sources_path, unicode):
+      sources_path = sources_path.encode('utf-8')
+    self.sources_path = sources_path
+
+  def do_import(self, target):
+    """Perform the import, with the resulting branch in ``target``.
+
+    :param target: the path to the branch that should be created for the
+                   import. The path cannot already exist.
+    :type target: string.
+    """
+    transport = get_transport(self.base)
+    sources_file = transport.get(self.sources_path)
+    if self.sources_path.endswith(".gz"):
+      sources_file = gzip.GzipFile(fileobj=sources_file)
+    dsc_files = []
+    for source in sources_file.read().split('\n\n'):
+      if source == '':
+        continue
+      source = deb822.Sources(source)
+      base_dir = source['Directory']
+      if not self._check_basedir(base_dir):
+        continue
+      for file_info in source['files']:
+        name = file_info['name']
+        if name.endswith('.dsc'):
+          dsc_files.append(urlutils.join(base_dir, name))
+    import_dsc(target, dsc_files, transport=transport)
+
+  def _check_basedir(self, base_dir):
+    return True
+
+
+class SnapshotImporter(SourcesImporter):
+  """Import all versions of a package recorded on snapshot.debian.net."""
+
+  def __init__(self, package_name):
+    base = 'http://snapshot.debian.net/archive/'
+    path = 'pool/%s/%s/source/Sources.gz' % (package_name[0], package_name)
+    super(SnapshotImporter, self).__init__(base, path)
+    warning("snapshot.debian.net has lost packages from before 12/03/2005, "
+            "only packages from after that date will be imported.")
+
+  def _check_basedir(self, base_dir):
+    import re
+    match = re.match(r'(?P<year>\d\d\d\d)/(?P<month>\d\d)/(?P<day>\d\d)',
+                     base_dir)
+    if match is not None:
+      year = int(match.group('year'))
+      if year < 2005:
+        return False
+      if year == 2005:
+        month = int(match.group('month'))
+        if month < 3:
+          return False
+        if month == 3:
+          day = int(match.group('day'))
+          if day < 13:
+            return False
+    return True
+

=== modified file 'tests/test_import_dsc.py'
--- a/tests/test_import_dsc.py	2007-06-26 18:45:53 +0000
+++ b/tests/test_import_dsc.py	2007-06-26 20:55:43 +0000
@@ -122,9 +122,10 @@
     os.system('diff -Nru %s %s | gzip -9 - > %s' % (self.basedir, diffdir,
                                                    self.diff_2))
 
-  def make_dsc(self, filename, version, file1, extra_files=[]):
+  def make_dsc(self, filename, version, file1, extra_files=[],
+               package='package'):
     write_to_file(filename, """Format: 1.0
-Source: package
+Source: %s
 Version: %s
 Binary: package
 Maintainer: maintainer <maint at maint.org>
@@ -133,7 +134,7 @@
 Build-Depends: debhelper (>= 5.0.0)
 Files:
  8636a3e8ae81664bac70158503aaf53a 1328218 %s
-""" % (version, file1))
+""" % (package, version, file1))
     i = 1
     for extra_file in extra_files:
       append_to_file(filename,
@@ -150,6 +151,10 @@
     self.make_diff_1b()
     self.make_dsc(self.dsc_1b, '0.1-2', self.diff_1b)
 
+  def make_dsc_1b_repeated_orig(self):
+    self.make_diff_1b()
+    self.make_dsc(self.dsc_1b, '0.1-2', self.orig_1, [self.diff_1b])
+
   def make_dsc_1c(self):
     self.make_diff_1c()
     self.make_dsc(self.dsc_1c, '0.1-3', self.diff_1c)
@@ -168,6 +173,11 @@
     self.make_dsc_1b()
     import_dsc(self.target, [self.dsc_1, self.dsc_1b])
 
+  def import_dsc_1b_repeated_diff(self):
+    self.make_dsc_1()
+    self.make_dsc_1b()
+    import_dsc(self.target, [self.dsc_1, self.dsc_1b, self.dsc_1b])
+
   def import_dsc_1c(self):
     self.make_dsc_1()
     self.make_dsc_1b()
@@ -182,6 +192,14 @@
     import_dsc(self.target,
                [self.dsc_1, self.dsc_1b, self.dsc_1c, self.dsc_2])
 
+  def import_dsc_2_repeated_orig(self):
+    self.make_dsc_1()
+    self.make_dsc_1b_repeated_orig()
+    self.make_dsc_1c()
+    self.make_dsc_2()
+    import_dsc(self.target,
+               [self.dsc_1, self.dsc_1b, self.dsc_1c, self.dsc_2])
+
   def test_import_dsc_target_extant(self):
     os.mkdir(self.target)
     write_to_file('package_0.1.dsc', '')
@@ -299,6 +317,33 @@
     self.assertEqual(modified[0][3], True)
     self.assertEqual(modified[0][4], False)
 
+  def test_import_two_dsc_one_upstream_history_repeated_diff(self):
+    self.import_dsc_1b_repeated_diff()
+    tree = WorkingTree.open(self.target)
+    rh = tree.branch.revision_history()
+    self.assertEqual(len(rh), 3)
+    msg = tree.branch.repository.get_revision(rh[0]).message
+    self.assertEqual(msg, 'import upstream from %s' % self.orig_1)
+    msg = tree.branch.repository.get_revision(rh[1]).message
+    self.assertEqual(msg, 'merge packaging changes from %s' % self.diff_1)
+    msg = tree.branch.repository.get_revision(rh[2]).message
+    self.assertEqual(msg, 'merge packaging changes from %s' % self.diff_1b)
+    changes = tree.changes_from(tree.branch.repository.revision_tree(rh[1]))
+    added = changes.added
+    self.assertEqual(len(added), 1, str(added))
+    self.assertEqual(added[0][0], 'debian/control')
+    self.assertEqual(added[0][2], 'file')
+    self.assertEqual(len(changes.removed), 1)
+    self.assertEqual(changes.removed[0][0], 'debian/install')
+    self.assertEqual(changes.removed[0][2], 'file')
+    self.assertEqual(len(changes.renamed), 0)
+    modified = changes.modified
+    self.assertEqual(len(modified), 1)
+    self.assertEqual(modified[0][0], 'debian/changelog')
+    self.assertEqual(modified[0][2], 'file')
+    self.assertEqual(modified[0][3], True)
+    self.assertEqual(modified[0][4], False)
+
   def test_import_three_dsc_one_upstream_tree(self):
     self.import_dsc_1c()
     self.failUnlessExists(self.target)
@@ -436,4 +481,51 @@
     self.assertRaises(ImportError, import_dsc, self.target, [self.dsc_1])
     self.make_dsc(self.dsc_1, '0.1-1', self.orig_1, [self.orig_1, self.diff_1])
     self.assertRaises(ImportError, import_dsc, self.target, [self.dsc_1])
+    self.make_dsc(self.dsc_1, '0.1-1', self.orig_1, [self.diff_1])
+    self.make_dsc(self.dsc_1b, '0.1-2', self.diff_1b, package='otherpackage')
+    self.assertRaises(ImportError, import_dsc, self.target,
+                      [self.dsc_1, self.dsc_1b])
+
+  def test_import_four_dsc_two_upstream_history_repeated_orig(self):
+    self.import_dsc_2_repeated_orig()
+    tree = WorkingTree.open(self.target)
+    rh = tree.branch.revision_history()
+    self.assertEqual(len(rh), 3)
+    msg = tree.branch.repository.get_revision(rh[0]).message
+    self.assertEqual(msg, 'import upstream from %s' % self.orig_1)
+    msg = tree.branch.repository.get_revision(rh[1]).message
+    self.assertEqual(msg, 'import upstream from %s' % self.orig_2)
+    msg = tree.branch.repository.get_revision(rh[2]).message
+    self.assertEqual(msg, 'merge packaging changes from %s' % self.diff_2)
+    parents = tree.branch.repository.revision_tree(rh[1]).get_parent_ids()
+    self.assertEqual(parents, [rh[0]], rh)
+    parents = tree.branch.repository.revision_tree(rh[2]).get_parent_ids()
+    self.assertEqual(len(parents), 2)
+    self.assertEqual(parents[0], rh[1], rh)
+    self.assertEqual(tree.branch.repository.get_revision(parents[1]).message,
+                     'merge packaging changes from %s' % self.diff_1c)
+    # Check the diff against upstream.
+    changes = tree.changes_from(tree.branch.repository.revision_tree(rh[1]))
+    added = changes.added
+    self.assertEqual(len(added), 3, str(added))
+    self.assertEqual(added[0][0], 'debian')
+    self.assertEqual(added[0][2], 'directory')
+    self.assertEqual(added[1][0], 'debian/changelog')
+    self.assertEqual(added[1][2], 'file')
+    self.assertEqual(added[2][0], 'debian/install')
+    self.assertEqual(added[2][2], 'file')
+    self.assertEqual(changes.removed, [])
+    self.assertEqual(changes.modified, [])
+    # Check the diff against last packaging version
+    changes = tree.changes_from(
+                 tree.branch.repository.revision_tree(parents[1]))
+    self.assertEqual(len(changes.added), 1)
+    self.assertEqual(changes.added[0][0], 'NEWS')
+    self.assertEqual(changes.added[0][2], 'file')
+    self.assertEqual(len(changes.removed), 1)
+    self.assertEqual(changes.removed[0][0], 'debian/control')
+    self.assertEqual(changes.removed[0][2], 'file')
+    self.assertEqual(len(changes.modified), 1)
+    self.assertEqual(changes.modified[0][0], 'debian/changelog')
+    self.assertEqual(changes.modified[0][2], 'file')
 



More information about the Pkg-bazaar-commits mailing list