[Reproducible-commits] [diffoscope] 03/11: Make the tlsh module optional

Jérémy Bobbio lunar at moszumanska.debian.org
Sat Nov 7 16:29:01 UTC 2015


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository diffoscope.

commit 76b2d063f2983da31fe8751fff110a59d8169835
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Sat Nov 7 15:57:21 2015 +0100

    Make the tlsh module optional
    
    tlsh is currently not available on PyPI. So let's make it optional
    so we can eventually make diffoscope available there.
---
 diffoscope/__main__.py             |  8 ++++++++
 diffoscope/comparators/__init__.py |  7 +++++--
 diffoscope/comparators/binary.py   | 36 ++++++++++++++++++++----------------
 tests/comparators/test_utils.py    |  9 +++++++++
 4 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/diffoscope/__main__.py b/diffoscope/__main__.py
index 5048c36..ecb784c 100644
--- a/diffoscope/__main__.py
+++ b/diffoscope/__main__.py
@@ -26,6 +26,10 @@ import os
 import signal
 import sys
 import traceback
+try:
+    import tlsh
+except ImportError:
+    tlsh = None
 from diffoscope import logger, VERSION, set_locale
 import diffoscope.comparators
 from diffoscope.config import Config
@@ -73,6 +77,8 @@ def create_parser():
                         help='link to an extra CSS for the HTML report')
     parser.add_argument('file1', help='first file to compare')
     parser.add_argument('file2', help='second file to compare')
+    if not tlsh:
+        parser.epilog = 'File renaming detection based on fuzzy-matching is currently disabled. It can be enabled by installing the “tlsh” module available at https://github.com/trendmicro/tlsh'
     return parser
 
 
@@ -102,6 +108,8 @@ class ListToolsAction(argparse.Action):
 
 
 def run_diffoscope(parsed_args):
+    if not tlsh and Config.general.fuzzy_threshold != parsed_args.fuzzy_threshold:
+        logger.warning('Fuzzy-matching is currently disabled as the “tlsh” module is unavailable.')
     Config.general.max_diff_block_lines = parsed_args.max_diff_block_lines
     Config.general.max_diff_input_lines = parsed_args.max_diff_input_lines
     Config.general.max_report_size = parsed_args.max_report_size
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index 46d1481..4f55528 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -23,7 +23,10 @@ import operator
 import os.path
 import re
 import sys
-import tlsh
+try:
+    import tlsh
+except ImportError:
+    tlsh = None
 from diffoscope import logger, tool_required
 from diffoscope.config import Config
 from diffoscope.difference import Difference
@@ -158,7 +161,7 @@ def specialize(file):
 
 
 def perform_fuzzy_matching(members1, members2):
-    if Config.general.fuzzy_threshold == 0:
+    if tlsh == None or Config.general.fuzzy_threshold == 0:
         return
     already_compared = set()
     # Perform local copies because they will be modified by consumer
diff --git a/diffoscope/comparators/binary.py b/diffoscope/comparators/binary.py
index 1f14e79..38ca2f6 100644
--- a/diffoscope/comparators/binary.py
+++ b/diffoscope/comparators/binary.py
@@ -28,7 +28,10 @@ import re
 from stat import S_ISCHR, S_ISBLK
 import subprocess
 import tempfile
-import tlsh
+try:
+    import tlsh
+except ImportError:
+    tlsh = None
 import magic
 from diffoscope.config import Config
 from diffoscope.difference import Difference
@@ -112,21 +115,22 @@ class File(object, metaclass=ABCMeta):
                 self._magic_file_type = File.guess_file_type(self.path)
         return self._magic_file_type
 
-    @property
-    def fuzzy_hash(self):
-        if not hasattr(self, '_fuzzy_hash'):
-            with self.get_content():
-                # tlsh is not meaningful with files smaller than 512 bytes
-                if os.stat(self.path).st_size >= 512:
-                    h = tlsh.Tlsh()
-                    with open(self.path, 'rb') as f:
-                        for buf in iter(lambda: f.read(32768), b''):
-                            h.update(buf)
-                    h.final()
-                    self._fuzzy_hash = h.hexdigest()
-                else:
-                    self._fuzzy_hash = None
-        return self._fuzzy_hash
+    if tlsh:
+        @property
+        def fuzzy_hash(self):
+            if not hasattr(self, '_fuzzy_hash'):
+                with self.get_content():
+                    # tlsh is not meaningful with files smaller than 512 bytes
+                    if os.stat(self.path).st_size >= 512:
+                        h = tlsh.Tlsh()
+                        with open(self.path, 'rb') as f:
+                            for buf in iter(lambda: f.read(32768), b''):
+                                h.update(buf)
+                        h.final()
+                        self._fuzzy_hash = h.hexdigest()
+                    else:
+                        self._fuzzy_hash = None
+            return self._fuzzy_hash
 
     @abstractmethod
     @contextmanager
diff --git a/tests/comparators/test_utils.py b/tests/comparators/test_utils.py
index 1957e71..7aa3326 100644
--- a/tests/comparators/test_utils.py
+++ b/tests/comparators/test_utils.py
@@ -20,6 +20,11 @@
 import codecs
 import os.path
 import pytest
+try:
+    import tlsh
+    miss_tlsh = False
+except ImportError:
+    miss_tlsh = True
 from diffoscope.comparators import specialize
 from diffoscope.comparators.binary import FilesystemFile, NonExistingFile
 from diffoscope.comparators.utils import Command
@@ -39,6 +44,7 @@ def fuzzy_tar2():
 def fuzzy_tar3():
     return specialize(FilesystemFile(os.path.join(os.path.dirname(__file__), '../data/fuzzy3.tar')))
 
+ at pytest.mark.skipif(miss_tlsh, reason='tlsh is missing')
 def test_fuzzy_matching(fuzzy_tar1, fuzzy_tar2):
     differences = fuzzy_tar1.compare(fuzzy_tar2).details
     expected_diff = codecs.open(os.path.join(os.path.dirname(__file__), '../data/text_iso8859_expected_diff'), encoding='utf-8').read()
@@ -47,6 +53,7 @@ def test_fuzzy_matching(fuzzy_tar1, fuzzy_tar2):
     assert 'similar' in differences[1].comment
     assert differences[1].unified_diff == expected_diff
 
+ at pytest.mark.skipif(miss_tlsh, reason='tlsh is missing')
 def test_fuzzy_matching_only_once(fuzzy_tar1, fuzzy_tar3):
     differences = fuzzy_tar1.compare(fuzzy_tar3).details
     assert len(differences) == 2
@@ -60,12 +67,14 @@ def fuzzy_tar_in_tar1():
 def fuzzy_tar_in_tar2():
     return specialize(FilesystemFile(os.path.join(os.path.dirname(__file__), '../data/fuzzy-tar-in-tar2.tar')))
 
+ at pytest.mark.skipif(miss_tlsh, reason='tlsh is missing')
 def test_no_fuzzy_matching(monkeypatch, fuzzy_tar_in_tar1, fuzzy_tar_in_tar2):
     monkeypatch.setattr(Config, 'fuzzy_threshold', 0)
     difference = fuzzy_tar_in_tar1.compare(fuzzy_tar_in_tar2)
     assert len(difference.details) == 1
     assert difference.details[0].source1 == 'tar --full-time -tvf {}'
 
+ at pytest.mark.skipif(miss_tlsh, reason='tlsh is missing')
 def test_no_fuzzy_matching_new_file(monkeypatch, fuzzy_tar_in_tar1, fuzzy_tar_in_tar2):
     monkeypatch.setattr(Config, 'fuzzy_threshold', 0)
     monkeypatch.setattr(Config, 'new_file', True)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git



More information about the Reproducible-commits mailing list