[Reproducible-commits] [debbindiff] 03/19: Refactor how we run diff and feed its input
Jérémy Bobbio
lunar at moszumanska.debian.org
Tue Mar 31 14:59:28 UTC 2015
This is an automated email from the git hooks/post-receive script.
lunar pushed a commit to branch pu/feed-diff
in repository debbindiff.
commit 13743a5a33709cba3ed5cc973e9987ca38419e53
Author: Jérémy Bobbio <lunar at debian.org>
Date: Mon Mar 30 05:34:02 2015 +0200
Refactor how we run diff and feed its input
---
debbindiff/difference.py | 100 ++++++++++++++++++++++++++++++-----------------
1 file changed, 65 insertions(+), 35 deletions(-)
diff --git a/debbindiff/difference.py b/debbindiff/difference.py
index c30c8b6..06a5cb4 100644
--- a/debbindiff/difference.py
+++ b/debbindiff/difference.py
@@ -17,6 +17,7 @@
# You should have received a copy of the GNU General Public License
# along with debbindiff. If not, see <http://www.gnu.org/licenses/>.
+from contextlib import contextmanager
import os
import os.path
from functools import partial
@@ -24,6 +25,7 @@ from tempfile import NamedTemporaryFile
import re
import subprocess
from threading import Thread
+from multiprocessing import Queue
from debbindiff import logger, tool_required, RequiredToolNotFound
@@ -34,8 +36,10 @@ MAX_DIFF_LINES = 10000
class DiffParser(object):
RANGE_RE = re.compile(r'^@@\s+-(?P<start1>\d+)(,(?P<len1>\d+))?\s+\+(?P<start2>\d+)(,(?P<len2>\d+))?\s+@@$')
- def __init__(self, output):
+ def __init__(self, output, end_nl_q1, end_nl_q2):
self._output = output
+ self._end_nl_q1 = end_nl_q1
+ self._end_nl_q2 = end_nl_q2
self._action = self.read_headers
self._diff = ''
self._success = False
@@ -43,6 +47,7 @@ class DiffParser(object):
self._remaining_hunk_lines = None
self._block_len = None
self._direction = None
+ self._end_nl = None
@property
def diff(self):
@@ -90,7 +95,13 @@ class DiffParser(object):
elif line[0] == '-':
self._remaining_hunk_lines -= 1
elif line[0] == '\\':
- pass
+ # When both files don't end with \n, do not show it as a difference
+ if self._end_nl is None:
+ end_nl1 = self._end_nl_q1.get()
+ end_nl2 = self._end_nl_q2.get()
+ self._end_nl = end_nl1 and end_nl2
+ if not self._end_nl:
+ return self.read_hunk
elif self._remaining_hunk_lines == 0:
return self.read_headers(line)
else:
@@ -122,49 +133,28 @@ class DiffParser(object):
DIFF_CHUNK = 4096
-def feed_content(f, content, add_ln):
- for offset in range(0, len(content), DIFF_CHUNK):
- f.write(content[offset:offset + DIFF_CHUNK].encode('utf-8'))
- if add_ln:
- f.write('\n')
- f.close()
-
-
@tool_required('diff')
-def diff(content1, content2):
- pipe_r1, pipe_w1 = os.pipe()
- pipe_r2, pipe_w2 = os.pipe()
- # run diff
+def run_diff(fd1, fd2, end_nl_q1, end_nl_q2):
logger.debug('running diff')
- cmd = ['diff', '-au7', '/dev/fd/%d' % pipe_r1, '/dev/fd/%d' % pipe_r2]
- def close_pipes():
- os.close(pipe_w1)
- os.close(pipe_w2)
+ cmd = ['diff', '-au7', '/dev/fd/%d' % fd1, '/dev/fd/%d' % fd2]
+ def close_fds():
+ fds = [int(fd) for fd in os.listdir('/dev/fd')
+ if int(fd) not in (1, 2, fd1, fd2)]
+ for fd in fds:
+ try:
+ os.close(fd)
+ except OSError:
+ pass
p = subprocess.Popen(cmd, shell=False, bufsize=1,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
- preexec_fn=close_pipes)
- os.close(pipe_r1)
- os.close(pipe_r2)
+ preexec_fn=close_fds)
p.stdin.close()
- output1 = os.fdopen(pipe_w1, 'w')
- output2 = os.fdopen(pipe_w2, 'w')
- parser = DiffParser(p.stdout)
+ parser = DiffParser(p.stdout, end_nl_q1, end_nl_q2)
t_read = Thread(target=parser.parse)
t_read.daemon = True
t_read.start()
- # work-around unified diff limitation: if there's no newlines in both
- # don't make it a difference
- add_ln = content1[-1] != '\n' and content2[-1] != '\n'
- t_write1 = Thread(target=feed_content, args=(output1, content1, add_ln))
- t_write1.daemon = True
- t_write1.start()
- t_write2 = Thread(target=feed_content, args=(output2, content2, add_ln))
- t_write2.daemon = True
- t_write2.start()
- t_write1.join()
- t_write2.join()
t_read.join()
p.wait()
if not parser.success and p.returncode not in (0, 1):
@@ -174,6 +164,46 @@ def diff(content1, content2):
return parser.diff
+def feed(feeder, f, end_nl_q):
+ # work-around unified diff limitation: if there's no newlines in both
+ # don't make it a difference
+ try:
+ end_nl = feeder(f)
+ end_nl_q.put(end_nl)
+ finally:
+ f.close()
+
+
+ at contextmanager
+def fd_from_feeder(feeder, end_nl_q):
+ pipe_r, pipe_w = os.pipe()
+ outf = os.fdopen(pipe_w, 'w')
+ t = Thread(target=feed, args=(feeder, outf, end_nl_q))
+ t.daemon = True
+ t.start()
+ yield pipe_r
+ t.join()
+ outf.close()
+
+
+def make_feeder_from_content(content):
+ def feeder(f):
+ for offset in range(0, len(content), DIFF_CHUNK):
+ f.write(content[offset:offset + DIFF_CHUNK].encode('utf-8'))
+ return content and content[-1] == '\n'
+ return feeder
+
+
+def diff(content1, content2):
+ end_nl_q1 = Queue()
+ end_nl_q2 = Queue()
+ feeder1 = make_feeder_from_content(content1)
+ feeder2 = make_feeder_from_content(content2)
+ with fd_from_feeder(feeder1, end_nl_q1) as fd1:
+ with fd_from_feeder(feeder2, end_nl_q2) as fd2:
+ return run_diff(fd1, fd2, end_nl_q1, end_nl_q2)
+
+
class Difference(object):
def __init__(self, unified_diff, path1, path2, source=None, comment=None):
self._comment = comment
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git
More information about the Reproducible-commits
mailing list