[Pkg-bazaar-commits] ./bzr/unstable r938: - various optimizations to weave add code
Martin Pool
mbp at sourcefrog.net
Fri Apr 10 08:21:31 UTC 2009
------------------------------------------------------------
revno: 938
committer: Martin Pool <mbp at sourcefrog.net>
timestamp: Mon 2005-07-18 08:34:12 -0300
message:
- various optimizations to weave add code
modified:
bzrlib/weave.py
-------------- next part --------------
=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py 2005-07-17 18:40:23 +0000
+++ b/bzrlib/weave.py 2005-07-18 11:34:12 +0000
@@ -28,6 +28,26 @@
# making _extract build and return a list, rather than being a generator
# takes 37.94s
+# with python -O, r923 does 2000 versions in 36.87s
+
+# with optimizations to avoid mutating lists - 35.75! I guess copying
+# all the elements every time costs more than the small manipulations.
+# a surprisingly small change.
+
+# r931, which avoids using a generator for extract, does 36.98s
+
+# with memoized inclusions, takes 41.49s; not very good
+
+# with slots, takes 37.35s; without takes 39.16, a bit surprising
+
+# with the delta calculation mixed in with the add method, rather than
+# separated, takes 36.78s
+
+# with delta folded in and mutation of the list, 36.13s
+
+# with all this and simplification of add code, 33s
+
+
# TODO: Perhaps have copy method for Weave instances?
# XXX: If we do weaves this way, will a merge still behave the same
@@ -148,6 +168,9 @@
_sha1s
List of hex SHA-1 of each version, or None if not recorded.
"""
+
+ ## __slots__ = ['_l', '_v', '_sha1s']
+
def __init__(self):
self._l = []
self._v = []
@@ -175,66 +198,105 @@
text
Sequence of lines to be added in the new version."""
- ## self._check_versions(parents)
+
+ self._check_versions(parents)
## self._check_lines(text)
- idx = len(self._v)
+ new_version = len(self._v)
import sha
s = sha.new()
- for l in text:
- s.update(l)
+ map(s.update, text)
sha1 = s.hexdigest()
del s
- # TODO: It'd probably be faster to append things on to a new
- # list rather than modifying the existing one, which is likely
- # to cause a lot of copying.
-
- if parents:
- ancestors = self.inclusions(parents)
- delta = self._delta(ancestors, text)
-
- # offset gives the number of lines that have been inserted
- # into the weave up to the current point; if the original edit instruction
- # says to change line A then we actually change (A+offset)
- offset = 0
-
- for i1, i2, newlines in delta:
- assert 0 <= i1
- assert i1 <= i2
- assert i2 <= len(self._l)
-
- # the deletion and insertion are handled separately.
- # first delete the region.
- if i1 != i2:
- self._l.insert(i1+offset, ('[', idx))
- self._l.insert(i2+offset+1, (']', idx))
- offset += 2
- # is this OK???
-
- if newlines:
- # there may have been a deletion spanning up to
- # i2; we want to insert after this region to make sure
- # we don't destroy ourselves
- i = i2 + offset
- self._l[i:i] = [('{', idx)] \
- + newlines \
- + [('}', idx)]
- offset += 2 + len(newlines)
-
- self._addversion(parents)
- else:
- # special case; adding with no parents revision; can do this
- # more quickly by just appending unconditionally
- self._l.append(('{', idx))
- self._l += text
- self._l.append(('}', idx))
-
- self._addversion(None)
-
+ # if we abort after here the weave will be corrupt
+ self._addversion(parents)
self._sha1s.append(sha1)
-
- return idx
+
+
+ if not parents:
+ # special case; adding with no parents revision; can do
+ # this more quickly by just appending unconditionally.
+ # even more specially, if we're adding an empty text we
+ # need do nothing at all.
+ if text:
+ self._l.append(('{', new_version))
+ self._l.extend(text)
+ self._l.append(('}', new_version))
+
+ return new_version
+
+ if len(parents) == 1 and sha1 == self._sha1s[parents[0]]:
+ # special case: same as the single parent
+ return new_version
+
+
+ ancestors = self.inclusions(parents)
+
+ l = self._l
+
+ # basis a list of (origin, lineno, line)
+ basis_lineno = []
+ basis_lines = []
+ for origin, lineno, line in self._extract(ancestors):
+ basis_lineno.append(lineno)
+ basis_lines.append(line)
+
+ # another small special case: a merge, producing the same text as auto-merge
+ if text == basis_lines:
+ return new_version
+
+ # add a sentinal, because we can also match against the final line
+ basis_lineno.append(len(self._l))
+
+ # XXX: which line of the weave should we really consider
+ # matches the end of the file? the current code says it's the
+ # last line of the weave?
+
+ #print 'basis_lines:', basis_lines
+ #print 'new_lines: ', lines
+
+ from difflib import SequenceMatcher
+ s = SequenceMatcher(None, basis_lines, text)
+
+ # offset gives the number of lines that have been inserted
+ # into the weave up to the current point; if the original edit instruction
+ # says to change line A then we actually change (A+offset)
+ offset = 0
+
+ for tag, i1, i2, j1, j2 in s.get_opcodes():
+ # i1,i2 are given in offsets within basis_lines; we need to map them
+ # back to offsets within the entire weave
+ #print 'raw match', tag, i1, i2, j1, j2
+ if tag == 'equal':
+ continue
+
+ i1 = basis_lineno[i1]
+ i2 = basis_lineno[i2]
+
+ assert 0 <= i1 <= i2 <= len(old_l)
+ assert 0 <= j1 <= j2 <= len(text)
+
+ #print tag, i1, i2, j1, j2
+
+ # the deletion and insertion are handled separately.
+ # first delete the region.
+ if i1 != i2:
+ self._l.insert(i1+offset, ('[', new_version))
+ self._l.insert(i2+offset+1, (']', new_version))
+ offset += 2
+
+ if j1 != j2:
+ # there may have been a deletion spanning up to
+ # i2; we want to insert after this region to make sure
+ # we don't destroy ourselves
+ i = i2 + offset
+ self._l[i:i] = ([('{', new_version)]
+ + text[j1:j2]
+ + [('}', new_version)])
+ offset += 2 + (j2 - j1)
+
+ return new_version
def inclusions(self, versions):
@@ -510,41 +572,6 @@
If line1=line2, this is a pure insert; if newlines=[] this is a
pure delete. (Similar to difflib.)
"""
- # basis a list of (origin, lineno, line)
- basis_lineno = []
- basis_lines = []
- for origin, lineno, line in self._extract(included):
- basis_lineno.append(lineno)
- basis_lines.append(line)
-
- # add a sentinal, because we can also match against the final line
- basis_lineno.append(len(self._l))
-
- # XXX: which line of the weave should we really consider
- # matches the end of the file? the current code says it's the
- # last line of the weave?
-
- from difflib import SequenceMatcher
- s = SequenceMatcher(None, basis_lines, lines)
-
- # TODO: Perhaps return line numbers from composed weave as well?
-
- for tag, i1, i2, j1, j2 in s.get_opcodes():
- ##print tag, i1, i2, j1, j2
-
- if tag == 'equal':
- continue
-
- # i1,i2 are given in offsets within basis_lines; we need to map them
- # back to offsets within the entire weave
- real_i1 = basis_lineno[i1]
- real_i2 = basis_lineno[i2]
-
- assert 0 <= j1
- assert j1 <= j2
- assert j2 <= len(lines)
-
- yield real_i1, real_i2, lines[j1:j2]
@@ -788,6 +815,7 @@
pb = ProgressBar()
w.check(pb)
pb.clear()
+ print '%d versions ok' % w.numversions()
elif cmd == 'inclusions':
w = readit()
More information about the Pkg-bazaar-commits
mailing list