[Reproducible-commits] [debbindiff] 06/08: Perform comparison with best fuzzy matching instead of first above threshold

Jérémy Bobbio lunar at moszumanska.debian.org
Fri Jul 31 08:54:51 UTC 2015


This is an automated email from the git hooks/post-receive script.

lunar pushed a commit to branch master
in repository debbindiff.

commit c342857c91aaad5e1ee643948c6f55e206e970b9
Author: Jérémy Bobbio <lunar at debian.org>
Date:   Fri Jul 31 08:36:15 2015 +0000

    Perform comparison with best fuzzy matching instead of first above threshold
---
 debbindiff/comparators/__init__.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/debbindiff/comparators/__init__.py b/debbindiff/comparators/__init__.py
index b35e309..cf28f82 100644
--- a/debbindiff/comparators/__init__.py
+++ b/debbindiff/comparators/__init__.py
@@ -19,6 +19,7 @@
 # along with debbindiff.  If not, see <http://www.gnu.org/licenses/>.
 
 import magic
+import operator
 import os.path
 import re
 import sys
@@ -129,12 +130,16 @@ fuzzy_threshold = 85
 
 
 def perform_fuzzy_matching(files1, files2):
+    files2 = set(files2)
     already_compared = set()
     for file1 in filter(lambda f: not f.is_directory(), files1):
-        for file2 in filter(lambda f: not f.is_directory(), files2):
-            similarity = ssdeep.compare(file1.fuzzy_hash, file2.fuzzy_hash)
-            logger.debug('fuzzy matching %s %s: %d', file1.name, file2.name, similarity)
+        comparisons = [(ssdeep.compare(file1.fuzzy_hash, file2.fuzzy_hash), file2)
+                       for file2 in files2 - already_compared
+                       if not file2.is_directory()]
+        if comparisons:
+            comparisons.sort(key=operator.itemgetter(0))
+            similarity, file2 = comparisons[-1]
+            logger.debug('fuzzy top  match %s %s: %d', file1.name, file2.name, similarity)
             if similarity >= fuzzy_threshold:
                 yield file1, file2, similarity
                 already_compared.add(file2)
-                break

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/debbindiff.git



More information about the Reproducible-commits mailing list