[Reproducible-commits] [diffoscope] 03/11: Make the tlsh module optional
Jérémy Bobbio
lunar at moszumanska.debian.org
Sat Nov 7 16:29:01 UTC 2015
This is an automated email from the git hooks/post-receive script.
lunar pushed a commit to branch master
in repository diffoscope.
commit 76b2d063f2983da31fe8751fff110a59d8169835
Author: Jérémy Bobbio <lunar at debian.org>
Date: Sat Nov 7 15:57:21 2015 +0100
Make the tlsh module optional
tlsh is currently not available on PyPI. So let's make it optional
so we can eventually make diffoscope available there.
---
diffoscope/__main__.py | 8 ++++++++
diffoscope/comparators/__init__.py | 7 +++++--
diffoscope/comparators/binary.py | 36 ++++++++++++++++++++----------------
tests/comparators/test_utils.py | 9 +++++++++
4 files changed, 42 insertions(+), 18 deletions(-)
diff --git a/diffoscope/__main__.py b/diffoscope/__main__.py
index 5048c36..ecb784c 100644
--- a/diffoscope/__main__.py
+++ b/diffoscope/__main__.py
@@ -26,6 +26,10 @@ import os
import signal
import sys
import traceback
+try:
+ import tlsh
+except ImportError:
+ tlsh = None
from diffoscope import logger, VERSION, set_locale
import diffoscope.comparators
from diffoscope.config import Config
@@ -73,6 +77,8 @@ def create_parser():
help='link to an extra CSS for the HTML report')
parser.add_argument('file1', help='first file to compare')
parser.add_argument('file2', help='second file to compare')
+ if not tlsh:
+ parser.epilog = 'File renaming detection based on fuzzy-matching is currently disabled. It can be enabled by installing the “tlsh” module available at https://github.com/trendmicro/tlsh'
return parser
@@ -102,6 +108,8 @@ class ListToolsAction(argparse.Action):
def run_diffoscope(parsed_args):
+ if not tlsh and Config.general.fuzzy_threshold != parsed_args.fuzzy_threshold:
+ logger.warning('Fuzzy-matching is currently disabled as the “tlsh” module is unavailable.')
Config.general.max_diff_block_lines = parsed_args.max_diff_block_lines
Config.general.max_diff_input_lines = parsed_args.max_diff_input_lines
Config.general.max_report_size = parsed_args.max_report_size
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index 46d1481..4f55528 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -23,7 +23,10 @@ import operator
import os.path
import re
import sys
-import tlsh
+try:
+ import tlsh
+except ImportError:
+ tlsh = None
from diffoscope import logger, tool_required
from diffoscope.config import Config
from diffoscope.difference import Difference
@@ -158,7 +161,7 @@ def specialize(file):
def perform_fuzzy_matching(members1, members2):
- if Config.general.fuzzy_threshold == 0:
+ if tlsh == None or Config.general.fuzzy_threshold == 0:
return
already_compared = set()
# Perform local copies because they will be modified by consumer
diff --git a/diffoscope/comparators/binary.py b/diffoscope/comparators/binary.py
index 1f14e79..38ca2f6 100644
--- a/diffoscope/comparators/binary.py
+++ b/diffoscope/comparators/binary.py
@@ -28,7 +28,10 @@ import re
from stat import S_ISCHR, S_ISBLK
import subprocess
import tempfile
-import tlsh
+try:
+ import tlsh
+except ImportError:
+ tlsh = None
import magic
from diffoscope.config import Config
from diffoscope.difference import Difference
@@ -112,21 +115,22 @@ class File(object, metaclass=ABCMeta):
self._magic_file_type = File.guess_file_type(self.path)
return self._magic_file_type
- @property
- def fuzzy_hash(self):
- if not hasattr(self, '_fuzzy_hash'):
- with self.get_content():
- # tlsh is not meaningful with files smaller than 512 bytes
- if os.stat(self.path).st_size >= 512:
- h = tlsh.Tlsh()
- with open(self.path, 'rb') as f:
- for buf in iter(lambda: f.read(32768), b''):
- h.update(buf)
- h.final()
- self._fuzzy_hash = h.hexdigest()
- else:
- self._fuzzy_hash = None
- return self._fuzzy_hash
+ if tlsh:
+ @property
+ def fuzzy_hash(self):
+ if not hasattr(self, '_fuzzy_hash'):
+ with self.get_content():
+ # tlsh is not meaningful with files smaller than 512 bytes
+ if os.stat(self.path).st_size >= 512:
+ h = tlsh.Tlsh()
+ with open(self.path, 'rb') as f:
+ for buf in iter(lambda: f.read(32768), b''):
+ h.update(buf)
+ h.final()
+ self._fuzzy_hash = h.hexdigest()
+ else:
+ self._fuzzy_hash = None
+ return self._fuzzy_hash
@abstractmethod
@contextmanager
diff --git a/tests/comparators/test_utils.py b/tests/comparators/test_utils.py
index 1957e71..7aa3326 100644
--- a/tests/comparators/test_utils.py
+++ b/tests/comparators/test_utils.py
@@ -20,6 +20,11 @@
import codecs
import os.path
import pytest
+try:
+ import tlsh
+ miss_tlsh = False
+except ImportError:
+ miss_tlsh = True
from diffoscope.comparators import specialize
from diffoscope.comparators.binary import FilesystemFile, NonExistingFile
from diffoscope.comparators.utils import Command
@@ -39,6 +44,7 @@ def fuzzy_tar2():
def fuzzy_tar3():
return specialize(FilesystemFile(os.path.join(os.path.dirname(__file__), '../data/fuzzy3.tar')))
+ at pytest.mark.skipif(miss_tlsh, reason='tlsh is missing')
def test_fuzzy_matching(fuzzy_tar1, fuzzy_tar2):
differences = fuzzy_tar1.compare(fuzzy_tar2).details
expected_diff = codecs.open(os.path.join(os.path.dirname(__file__), '../data/text_iso8859_expected_diff'), encoding='utf-8').read()
@@ -47,6 +53,7 @@ def test_fuzzy_matching(fuzzy_tar1, fuzzy_tar2):
assert 'similar' in differences[1].comment
assert differences[1].unified_diff == expected_diff
+ at pytest.mark.skipif(miss_tlsh, reason='tlsh is missing')
def test_fuzzy_matching_only_once(fuzzy_tar1, fuzzy_tar3):
differences = fuzzy_tar1.compare(fuzzy_tar3).details
assert len(differences) == 2
@@ -60,12 +67,14 @@ def fuzzy_tar_in_tar1():
def fuzzy_tar_in_tar2():
return specialize(FilesystemFile(os.path.join(os.path.dirname(__file__), '../data/fuzzy-tar-in-tar2.tar')))
+ at pytest.mark.skipif(miss_tlsh, reason='tlsh is missing')
def test_no_fuzzy_matching(monkeypatch, fuzzy_tar_in_tar1, fuzzy_tar_in_tar2):
monkeypatch.setattr(Config, 'fuzzy_threshold', 0)
difference = fuzzy_tar_in_tar1.compare(fuzzy_tar_in_tar2)
assert len(difference.details) == 1
assert difference.details[0].source1 == 'tar --full-time -tvf {}'
+ at pytest.mark.skipif(miss_tlsh, reason='tlsh is missing')
def test_no_fuzzy_matching_new_file(monkeypatch, fuzzy_tar_in_tar1, fuzzy_tar_in_tar2):
monkeypatch.setattr(Config, 'fuzzy_threshold', 0)
monkeypatch.setattr(Config, 'new_file', True)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/diffoscope.git
More information about the Reproducible-commits
mailing list