[Reproducible-commits] [debbindiff] 02/03: Reimplement are_same_binaries() using cmp

Jérémy Bobbio lunar at moszumanska.debian.org
Thu Apr 30 17:26:52 UTC 2015


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository debbindiff.

commit 1a503b9f5cc91b33d5034fea6647129dafacea0e
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Thu Apr 30 19:25:14 2015 +0200

    Reimplement are_same_binaries() using cmp
    
    It should be a lot faster. Because it's a new required tool,
    we must shuffle couple things around.
---
 debbindiff/__init__.py             |  1 +
 debbindiff/comparators/__init__.py |  7 +++++--
 debbindiff/comparators/binary.py   |  6 ++++++
 debbindiff/comparators/deb.py      |  3 ++-
 debbindiff/comparators/utils.py    | 21 ++-------------------
 5 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/debbindiff/__init__.py b/debbindiff/__init__.py
index 570daa7..207b53d 100644
--- a/debbindiff/__init__.py
+++ b/debbindiff/__init__.py
@@ -34,6 +34,7 @@ ch.setFormatter(formatter)
 class RequiredToolNotFound(Exception):
     PROVIDERS = { 'ar':         { 'debian': 'binutils-multiarch' }
                 , 'bzip2':      { 'debian': 'bzip2' }
+                , 'cmp':        { 'debian': 'diffutils' }
                 , 'cpio':       { 'debian': 'cpio' }
                 , 'diff':       { 'debian': 'diffutils' }
                 , 'file':       { 'debian': 'file' }
diff --git a/debbindiff/comparators/__init__.py b/debbindiff/comparators/__init__.py
index e4ea496..aee39b6 100644
--- a/debbindiff/comparators/__init__.py
+++ b/debbindiff/comparators/__init__.py
@@ -22,8 +22,9 @@ import magic
 import os.path
 import re
 import sys
-from debbindiff import logger
-from debbindiff.comparators.binary import compare_binary_files
+from debbindiff import logger, tool_required
+from debbindiff.comparators.binary import \
+    compare_binary_files, are_same_binaries
 from debbindiff.comparators.bzip2 import compare_bzip2_files
 from debbindiff.comparators.changes import compare_changes_files
 from debbindiff.comparators.cpio import compare_cpio_files
@@ -55,6 +56,8 @@ def guess_mime_type(path):
 
 def compare_unknown(path1, path2, source=None):
     logger.debug("compare unknown path: %s and %s", path1, path2)
+    if are_same_binaries(path1, path2):
+        return []
     mime_type1 = guess_mime_type(path1)
     mime_type2 = guess_mime_type(path2)
     logger.debug("mime_type1: %s | mime_type2: %s", mime_type1, mime_type2)
diff --git a/debbindiff/comparators/binary.py b/debbindiff/comparators/binary.py
index 682ebb5..572762e 100644
--- a/debbindiff/comparators/binary.py
+++ b/debbindiff/comparators/binary.py
@@ -58,3 +58,9 @@ def compare_binary_files(path1, path2, source=None):
     if not difference:
         return []
     return [difference]
+
+
+ at tool_required('cmp')
+def are_same_binaries(path1, path2):
+    return 0 == subprocess.call(['cmp', '--silent', path1, path2],
+                                shell=False, close_fds=True)
diff --git a/debbindiff/comparators/deb.py b/debbindiff/comparators/deb.py
index 9a9be51..6e46fe0 100644
--- a/debbindiff/comparators/deb.py
+++ b/debbindiff/comparators/deb.py
@@ -22,8 +22,9 @@ from debian.arfile import ArFile
 from debbindiff import logger
 from debbindiff.difference import Difference, get_source
 import debbindiff.comparators
+from debbindiff.comparators.binary import are_same_binaries
 from debbindiff.comparators.utils import \
-    binary_fallback, make_temp_directory, are_same_binaries, get_ar_content
+    binary_fallback, make_temp_directory, get_ar_content
 
 
 @binary_fallback
diff --git a/debbindiff/comparators/utils.py b/debbindiff/comparators/utils.py
index 18ad66f..66272af 100644
--- a/debbindiff/comparators/utils.py
+++ b/debbindiff/comparators/utils.py
@@ -27,29 +27,12 @@ import shutil
 import subprocess
 import tempfile
 from threading import Thread
-from debbindiff.comparators.binary import compare_binary_files
+from debbindiff.comparators.binary import \
+    compare_binary_files, are_same_binaries
 from debbindiff.difference import Difference
 from debbindiff import logger, RequiredToolNotFound
 
 
-def are_same_binaries(path1, path2):
-    BUF_SIZE = 20 * 2 ** 10  # 20 kB
-    h1 = hashlib.md5()
-    f1 = open(path1, 'rb')
-    h2 = hashlib.md5()
-    f2 = open(path2, 'rb')
-    while True:
-        buf1 = f1.read(BUF_SIZE)
-        buf2 = f2.read(BUF_SIZE)
-        if not buf1 or not buf2:
-            return not buf1 and not buf2
-        h1.update(buf1)
-        h2.update(buf2)
-        if h1.digest() != h2.digest():
-            return False
-    return True
-
-
 # decorator that will create a fallback on binary diff if no differences
 # are detected or if an external tool fails
 def binary_fallback(original_function):

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git



More information about the Reproducible-commits mailing list