[Pkg-bazaar-commits] ./bzr-gtk/unstable r27: refactor distances

David Allouche david.allouche at canonical.com
Fri Apr 10 07:15:38 UTC 2009


------------------------------------------------------------
revno: 27
committer: David Allouche <david.allouche at canonical.com>
timestamp: Sat 2005-12-03 02:09:28 +0100
message:
  refactor distances
modified:
  graph.py
-------------- next part --------------
=== modified file 'graph.py'
--- a/graph.py	2005-11-28 00:30:06 +0000
+++ b/graph.py	2005-12-03 01:09:28 +0000
@@ -30,63 +30,44 @@
         self.message = self.revision_id
 
 
-
-def distances(branch, start):
-    """Sort the revisions.
-
-    Traverses the branch revision tree starting at start and produces an
-    ordered list of revisions such that a revision always comes after
-    any revision it is the parent of.
-
-    Returns a tuple of (revids, revisions, colours, children)
-    """
-    revisions = { start: branch.get_revision(start) }
-    children = { revisions[start]: set() }
-    distances = { start: 0 }
-    colours = { start: 0 }
-    last_colour = 0
-
-    # Sort the revisions; the fastest way to do this is to visit each node
-    # as few times as possible (by keeping the todo list in a set) and record
-    # the largest distance to it before queuing up the children if we
-    # increased the distance.  This produces the sort order we desire
-    todo = set([ start ])
-    while todo:
-        revid = todo.pop()
-        revision = revisions[revid]
-        distance = distances[revid] + 1
-
-        found_same = False
+class DistanceMethod(object):
+
+    def __init__(self, branch, start):
+        self.branch = branch
+        self.start = start
+        self.revisions = {}
+        self.children = {}
+        self.children_of_id = {start: set()}
+        self.parent_ids_of = {}
+        self.colours = { start: 0 }
+        self.last_colour = 0
+        self.direct_parent_of = {}
+
+    def get_revision(self, revid):
+        """Retrieve a revision from the cache or the branch."""
+        try:
+            revision = self.revisions[revid]
+        except KeyError:
+            try:
+                revision = self.branch.get_revision(revid)
+            except NoSuchRevision:
+                revision = DummyRevision(revid)
+            self.set_caches(revid, revision)
+        return revision
+
+    def set_caches(self, revid, revision):
+        "Set the caches for a newly retrieved revision."""
+        # Build a revision cache
+        self.revisions[revid] = revision
+        # Build a children dictionnary
         for parent_id in revision.parent_ids:
-            # Get the parent from the cache, or put it in the cache
-            try:
-                parent = revisions[parent_id]
-            except KeyError:
-                try:
-                    parent = branch.get_revision(parent_id)
-                except NoSuchRevision:
-                    parent = DummyRevision(parent_id)
-                revisions[parent_id] = parent
-            children.setdefault(parent, set()).add(revision)
-
-            # Check whether there's any point re-processing this
-            if parent_id in distances and distances[parent_id] >= distance:
-                continue
-
-            distances[parent_id] = distance
-            todo.add(parent_id)
-
-    # Topologically sorted revids, with the most recent revisions first
-    sorted_revids = sorted(distances, key=distances.get)
-
-    # Build a parents dictionnary, where redundant parents will be removed, and
-    # that will be passed along tothe rest of program.
-    parent_ids_of = {}
-    for revision in revisions.itervalues():
+            self.children_of_id.setdefault(parent_id, set()).add(revision)
+        # Build a parents dictionnary, where redundant parents will be removed,
+        # and that will be passed along tothe rest of program.
         if len(revision.parent_ids) == len(set(revision.parent_ids)):
-            parent_ids_of[revision] = list(revision.parent_ids)
+            self.parent_ids_of[revision] = list(revision.parent_ids)
         else:
-            # remove duplicate parent revisions
+            # Remove duplicate parents
             parent_ids = []
             parent_ids_set = set()
             for parent_id in revision.parent_ids:
@@ -94,134 +75,208 @@
                     continue
                 parent_ids.append(parent_id)
                 parent_ids_set.add(parent_id)
-            parent_ids_of[revision] = parent_ids
-
-    # Count the number of children of each revision, so we can release memory
-    # for ancestry data as soon as it's not going to be needed anymore.
-    pending_count_of = {}
-    for parent, the_children in children.iteritems():
-        pending_count_of[parent.revision_id] = len(the_children)
-
-    # Build the ancestry dictionnary by examining older revisions first, and
-    # remove revision parents that are ancestors of other parents of the same
-    # revision.
-    ancestor_ids_of = {}
-    for revid in reversed(sorted_revids):
-        revision = revisions[revid]
-        parent_ids = parent_ids_of[revision]
-        # ignore candidate parents which are an ancestor of another parent, but
-        # never ignore the leftmost parent
-        redundant_ids = []
-        ignorable_parent_ids = parent_ids[1:] # never ignore leftmost
-        for candidate_id in ignorable_parent_ids: 
-            for parent_id in list(parent_ids):
-                if candidate_id in ancestor_ids_of[parent_id]:
-                    redundant_ids.append(candidate_id)
-                    parent_ids.remove(candidate_id)
-                    children_of_candidate = children[revisions[candidate_id]]
-                    children_of_candidate.remove(revision)
-                    break
-        # save the set of ancestors of that revision
-        ancestor_ids = set(parent_ids)
-        for parent_id in parent_ids:
-            ancestor_ids.update(ancestor_ids_of[parent_id])
-        ancestor_ids_of[revid] = ancestor_ids
-        # discard ancestry data for revisions whose children are already done
-        for parent_id in parent_ids + redundant_ids:
-            pending_count = pending_count_of[parent_id] - 1
-            pending_count_of[parent_id] = pending_count
-            if pending_count == 0:
-                ancestor_ids_of[parent_id] = None
-
-    # Try to compact sequences of revisions on the same branch.
-    direct_parent_of = {}
-    distances = {}
-    skipped_revids = []
-    expected_id = sorted_revids[0]
-    pending_ids = []
-    while True:
-        revid = sorted_revids.pop(0)
-        if revid != expected_id:
-            skipped_revids.append(revid)
-            continue
-        revision = revisions[revid]
-        for child in children[revision]:
-            # postpone if any child is missing
-            if child.revision_id not in distances:
-                if expected_id not in pending_ids:
-                    pending_ids.append(expected_id)
-                assert len(pending_ids) > 1
+            self.parent_ids_of[revision] = parent_ids
+
+    def make_children_map(self):
+        revisions = self.revisions
+        return dict((revisions[revid], c)
+                    for (revid, c) in self.children_of_id.iteritems())
+
+    def first_ancestry_traversal(self):
+        # Sort the revisions; the fastest way to do this is to visit each node
+        # as few times as possible (by keeping the todo list in a set) and
+        # record the largest distance to it before queuing up the children if
+        # we increased the distance. This produces the sort order we desire
+        distances = { self.start: 0 }
+        todo = set([self.start])
+        while todo:
+            revid = todo.pop()
+            revision = self.get_revision(revid)
+            distance = distances[revid] + 1
+            for parent_id in revision.parent_ids:
+                if parent_id in distances and distances[parent_id] >= distance:
+                    continue
+                distances[parent_id] = distance
+                todo.add(parent_id)
+        # Topologically sorted revids, with the most recent revisions first.
+        # A revision occurs only after all of its children.
+        return sorted(distances, key=distances.get)
+
+    def remove_redundant_parents(self, sorted_revids):
+        children_of_id = self.children_of_id
+        revisions = self.revisions
+        parent_ids_of = self.parent_ids_of
+        
+        # Count the number of children of each revision, so we can release
+        # memory for ancestry data as soon as it's not going to be needed
+        # anymore.
+        pending_count_of = {}
+        for parent_id, children in children_of_id.iteritems():
+            pending_count_of[parent_id] = len(children)
+
+        # Build the ancestry dictionnary by examining older revisions first,
+        # and remove revision parents that are ancestors of other parents of
+        # the same revision.
+        ancestor_ids_of = {}
+        for revid in reversed(sorted_revids):
+            revision = revisions[revid]
+            parent_ids = parent_ids_of[revision]
+            # ignore candidate parents which are an ancestor of another parent,
+            # but never ignore the leftmost parent
+            redundant_ids = []
+            ignorable_parent_ids = parent_ids[1:] # never ignore leftmost
+            for candidate_id in ignorable_parent_ids: 
+                for parent_id in list(parent_ids):
+                    if candidate_id in ancestor_ids_of[parent_id]:
+                        redundant_ids.append(candidate_id)
+                        parent_ids.remove(candidate_id)
+                        children_of_candidate = children_of_id[candidate_id]
+                        children_of_candidate.remove(revision)
+                        break
+            # save the set of ancestors of that revision
+            ancestor_ids = set(parent_ids)
+            for parent_id in parent_ids:
+                ancestor_ids.update(ancestor_ids_of[parent_id])
+            ancestor_ids_of[revid] = ancestor_ids
+            # discard ancestry data for revisions whose children are already
+            # done
+            for parent_id in parent_ids + redundant_ids:
+                pending_count = pending_count_of[parent_id] - 1
+                pending_count_of[parent_id] = pending_count
+                if pending_count == 0:
+                    ancestor_ids_of[parent_id] = None
+
+    def sort_revisions_and_set_colours(self, sorted_revids):
+        revisions = self.revisions
+        parent_ids_of = self.parent_ids_of
+        children_of_id = self.children_of_id
+        # Try to compact sequences of revisions on the same branch.
+        distances = {}
+        skipped_revids = []
+        expected_id = sorted_revids[0]
+        pending_ids = []
+        while True:
+            revid = sorted_revids.pop(0)
+            if revid != expected_id:
+                skipped_revids.append(revid)
+                continue
+            revision = revisions[revid]
+            for child in children_of_id[revid]:
+                # postpone if any child is missing
+                if child.revision_id not in distances:
+                    if expected_id not in pending_ids:
+                        pending_ids.append(expected_id)
+                    assert len(pending_ids) > 1
+                    expected_id = pending_ids.pop(0)
+                    skipped_revids.append(revid)
+                    sorted_revids[:0] = skipped_revids
+                    skipped_revids = []
+                    break
+            else:
+                # all children are here, push!
+                distances[revid] = len(distances)
+                self.choose_colour(revision, distances)
+                # all parents will need to be pushed as soon as possible
+                for parent in parent_ids_of[revision]:
+                    if parent not in pending_ids:
+                        pending_ids.insert(0, parent)
+                if not pending_ids:
+                    break
                 expected_id = pending_ids.pop(0)
-                skipped_revids.append(revid)
-                sorted_revids[:0] = skipped_revids
-                skipped_revids = []
-                break
-        else:
-            # all children are here, push!
-            distances[revid] = len(distances)
-            # choose colour
-            the_children = children[revision]
-            if len(the_children) == 1:
-                [child] = the_children
-                if len(parent_ids_of[child]) == 1:
-                    # one-one relationship between parent and child, same
-                    # colour
-                    colours[revid] = colours[child.revision_id]
-                else:
-                    # one child with multiple parents, the first parent with
-                    # the same committer gets the colour
-                    direct_parent = direct_parent_of.get(child)
-                    if direct_parent is None:                        
-                        for parent_id in parent_ids_of[child]:
-                            parent_revision = revisions[parent_id]
-                            if parent_revision.committer == child.committer:
-                                direct_parent = parent_revision
-                                direct_parent_of[child] = direct_parent
-                                break
-                    if direct_parent == revision:
-                        colours[revid] = colours[child.revision_id]
-                    else:
-                        colours[revid] = last_colour = last_colour + 1
-            else:
-                # multiple children, get the colour of the last displayed child
-                # with the same committer which does not already had its colour
-                # taken
-                available = {}
-                for child in the_children:
-                    if child.committer != revision.committer:
-                        continue
-                    direct_parent = direct_parent_of.get(child)
-                    if direct_parent == revision:
-                        colours[revid] = colours[child.revision_id]
-                        break
-                    if direct_parent is None:
-                        available[child] = distances[child.revision_id]
-                else:
-                    if available:
-                        sorted_children = sorted(available, key=available.get)
-                        child = sorted_children[-1]
-                        direct_parent_of[child] = revision
-                        colours[revid] = colours[child.revision_id]
-                    else:
-                        # no candidate children is available, pick the next
-                        # colour
-                        colours[revid] = last_colour = last_colour + 1
-            # all parents will need to be pushed as soon as possible
-            for parent in parent_ids_of[revision]:
-                if parent not in pending_ids:
-                    pending_ids.insert(0, parent)
-            if not pending_ids:
-                break
-            expected_id = pending_ids.pop(0)
-            # if the next expected revid has already been skipped, requeue it
-            # and its potential ancestors.
-            if expected_id in skipped_revids:
-                pos = skipped_revids.index(expected_id)
-                sorted_revids[:0] = skipped_revids[pos:]
-                del skipped_revids[pos:]
-
-    sorted_revids = sorted(distances, key=distances.get)
-
+                # if the next expected revid has already been skipped, requeue
+                # it and its potential ancestors.
+                if expected_id in skipped_revids:
+                    pos = skipped_revids.index(expected_id)
+                    sorted_revids[:0] = skipped_revids[pos:]
+                    del skipped_revids[pos:]
+        return sorted(distances, key=distances.get)
+
+    def choose_colour(self, revision, distances):
+        revid = revision.revision_id
+        children_of_id = self.children_of_id
+        parent_ids_of = self.parent_ids_of
+        colours = self.colours
+        # choose colour
+        the_children = children_of_id[revid]
+        if len(the_children) == 1:
+            [child] = the_children
+            if len(parent_ids_of[child]) == 1:
+                # one-one relationship between parent and child, same
+                # colour
+                colours[revid] = colours[child.revision_id]
+            else:
+                self.choose_colour_one_child(revision, child)
+        else:
+            self.choose_colour_many_children(revision, the_children, distances)
+
+    def choose_colour_one_child(self, revision, child):
+        revid = revision.revision_id
+        direct_parent_of = self.direct_parent_of
+        revisions = self.revisions
+        # one child with multiple parents, the first parent with
+        # the same committer gets the colour
+        direct_parent = direct_parent_of.get(child)
+        if direct_parent is None:
+            # if it has not been found yet, find it now and remember
+            for parent_id in self.parent_ids_of[child]:
+                parent_revision = revisions[parent_id]
+                if parent_revision.committer == child.committer:
+                    # found the first parent with the same committer
+                    direct_parent = parent_revision
+                    direct_parent_of[child] = direct_parent
+                    break
+        if direct_parent == revision:
+            self.colours[revid] = self.colours[child.revision_id]
+        else:
+            self.colours[revid] = self.last_colour = self.last_colour + 1
+
+    def choose_colour_many_children(self, revision, the_children, distances):
+        revid = revision.revision_id
+        direct_parent_of = self.direct_parent_of
+        # multiple children, get the colour of the last displayed child
+        # with the same committer which does not already have its colour
+        # taken
+        available = {}
+        for child in the_children:
+            if child.committer != revision.committer:
+                continue
+            direct_parent = direct_parent_of.get(child)
+            if direct_parent == revision:
+                self.colours[revid] = self.colours[child.revision_id]
+                break
+            if direct_parent is None:
+                available[child] = distances[child.revision_id]
+        else:
+            if available:
+                sorted_children = sorted(available, key=available.get)
+                child = sorted_children[-1]
+                direct_parent_of[child] = revision
+                self.colours[revid] = self.colours[child.revision_id]
+            else:
+                # no candidate children is available, pick the next
+                # colour
+                self.colours[revid] = self.last_colour = self.last_colour + 1
+
+
+def distances(branch, start):
+    """Sort the revisions.
+
+    Traverses the branch revision tree starting at start and produces an
+    ordered list of revisions such that a revision always comes after
+    any revision it is the parent of.
+
+    Returns a tuple of (revids, revisions, colours, children)
+    """
+    distance_method = DistanceMethod(branch, start)
+    sorted_revids = distance_method.first_ancestry_traversal()
+    distance_method.remove_redundant_parents(sorted_revids)
+    sorted_revids = \
+        distance_method.sort_revisions_and_set_colours(sorted_revids)
+
+    revisions = distance_method.revisions
+    colours = distance_method.colours
+    children = distance_method.make_children_map()
+    parent_ids_of = distance_method.parent_ids_of
     return (sorted_revids, revisions, colours, children, parent_ids_of)
 
 def graph(revids, revisions, colours, parent_ids):
@@ -283,6 +338,7 @@
 
         yield (revisions[revid], node, lines)
 
+
 def same_branch(a, b):
     """Return whether we think revisions a and b are on the same branch."""
     if len(a.parent_ids) == 1:



More information about the Pkg-bazaar-commits mailing list