[Forensics-changes] [yara] 12/135: Fix bug in regular expression engine causing false negative matches

Hilko Bengen bengen at moszumanska.debian.org
Sat Jul 1 10:27:27 UTC 2017


This is an automated email from the git hooks/post-receive script.

bengen pushed a commit to annotated tag v3.1.0
in repository yara.

commit 08c8456a0728522b347e936f66b98676a890cd09
Author: Victor M. Alvarez <plusvic at gmail.com>
Date:   Fri Mar 28 16:44:49 2014 +0100

    Fix bug in regular expression engine causing false negative matches
    
    Affected regular expressions were those where the chosen atom had a {n} or {n, m} quantifier where n > 1. Example: /.b{2}/ didn't match "abb"
---
 libyara/atoms.c      |  5 +++--
 libyara/re.c         | 32 ++++++++++++++++++++-----------
 libyara/rules.c      | 54 +++++++++++++++++++---------------------------------
 yara-python/tests.py |  1 +
 4 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/libyara/atoms.c b/libyara/atoms.c
index f78b580..cb2bb97 100644
--- a/libyara/atoms.c
+++ b/libyara/atoms.c
@@ -728,13 +728,14 @@ ATOM_TREE_NODE* _yr_atoms_extract_from_re_node(
 
       append_current_leaf_to_node(current_node);
 
-      if (re_node->start > 0)
+      for (i = 0; i < re_node->start; i++)
       {
         current_node = _yr_atoms_extract_from_re_node(
             re_node->left, atom_tree, current_node);
+      }
 
+      if (re_node->start > 0)
         append_current_leaf_to_node(current_node);
-      }
 
       return current_node;
 
diff --git a/libyara/re.c b/libyara/re.c
index cb142bf..cd8b282 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -45,8 +45,10 @@ order to avoid confusion with operating system threads.
 #define RE_MAX_STACK    1024
 #define RE_SCAN_LIMIT   4096
 
-#define EMIT_FLAGS_BACKWARDS           1
-#define EMIT_FLAGS_DONT_ANNOTATE_RE    2
+#define EMIT_BACKWARDS                1
+#define DONT_UPDATE_FORWARDS_CODE     2
+#define DONT_UPDATE_BACKWARDS_CODE    4
+
 
 #ifndef min
 #define min(x, y)  ((x < y) ? (x) : (y))
@@ -653,7 +655,7 @@ int _yr_re_emit(
 
   case RE_NODE_CONCAT:
 
-    if (flags & EMIT_FLAGS_BACKWARDS)
+    if (flags & EMIT_BACKWARDS)
     {
       left = re_node->right;
       right = re_node->left;
@@ -842,10 +844,15 @@ int _yr_re_emit(
 
       for (i = 0; i < re_node->start - 1; i++)
       {
+        // Don't want re_node->forward_code updated in this call
+        // forward_code must remain pointing to the code generated by
+        // by the  _yr_re_emit above. However we want re_node->backward_code
+        // being updated.
+
         FAIL_ON_ERROR(_yr_re_emit(
             re_node->left,
             arena,
-            flags | EMIT_FLAGS_DONT_ANNOTATE_RE,
+            flags | DONT_UPDATE_FORWARDS_CODE,
             NULL,
             &branch_size));
 
@@ -880,7 +887,7 @@ int _yr_re_emit(
     FAIL_ON_ERROR(_yr_re_emit(
         re_node->left,
         arena,
-        flags | EMIT_FLAGS_DONT_ANNOTATE_RE,
+        flags | DONT_UPDATE_FORWARDS_CODE | DONT_UPDATE_BACKWARDS_CODE,
         NULL,
         &branch_size));
 
@@ -907,11 +914,14 @@ int _yr_re_emit(
     break;
   }
 
-  if (!(flags & EMIT_FLAGS_DONT_ANNOTATE_RE))
+  if (flags & EMIT_BACKWARDS)
   {
-    if (flags & EMIT_FLAGS_BACKWARDS)
-      re_node->backward_code = instruction_addr;
-    else
+    if (!(flags & DONT_UPDATE_BACKWARDS_CODE))
+      re_node->backward_code = instruction_addr + *code_size;
+  }
+  else
+  {
+    if (!(flags & DONT_UPDATE_FORWARDS_CODE))
       re_node->forward_code = instruction_addr;
   }
 
@@ -946,7 +956,7 @@ int yr_re_emit_code(
   FAIL_ON_ERROR(_yr_re_emit(
       re->root_node,
       arena,
-      EMIT_FLAGS_BACKWARDS,
+      EMIT_BACKWARDS,
       NULL,
       &code_size));
 
@@ -1483,7 +1493,7 @@ int yr_re_exec(
 
         case RE_OPCODE_MATCH_AT_START:
           if (flags & RE_FLAGS_BACKWARDS)
-            kill = (input_size - 1 > count - character_size);
+            kill = (input_size > count);
           else
             kill = (count != 0);
           action = kill ? ACTION_KILL : ACTION_CONTINUE;
diff --git a/libyara/rules.c b/libyara/rules.c
index 3e850cb..67bd717 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -199,6 +199,23 @@ int _yr_scan_fast_hex_re_exec(
 
     while(!stop)
     {
+      if (*ip == RE_OPCODE_MATCH)
+      {
+        if (flags & RE_FLAGS_EXHAUSTIVE)
+        {
+            callback(
+               flags & RE_FLAGS_BACKWARDS ? current_input + 1 : input,
+               matches,
+               flags,
+               callback_args);
+            break;
+        }
+        else
+        {
+            return matches;
+        }
+      }
+
       if (flags & RE_FLAGS_BACKWARDS)
       {
         if (current_input <= input - input_size)
@@ -283,23 +300,6 @@ int _yr_scan_fast_hex_re_exec(
         default:
           assert(FALSE);
       }
-
-      if (*ip == RE_OPCODE_MATCH)
-      {
-        if (flags & RE_FLAGS_EXHAUSTIVE)
-        {
-          callback(
-            flags & RE_FLAGS_BACKWARDS ? current_input + 1 : input,
-            matches,
-            flags,
-            callback_args);
-          stop = TRUE;
-        }
-        else
-        {
-          return matches;
-        }
-      }
     }
   }
 
@@ -552,27 +552,13 @@ int _yr_scan_match_callback(
   YR_STRING* string = callback_args->string;
   YR_MATCH* new_match;
 
-  int character_size;
   int result = ERROR_SUCCESS;
   int tidx = callback_args->tidx;
 
   size_t match_offset = match_data - callback_args->data;
 
-  if (flags & RE_FLAGS_WIDE)
-    character_size = 2;
-  else
-    character_size = 1;
-
-  // match_length > 0 means that we have found some backward matching
-  // but backward matching overlaps one character with forward matching,
-  // we decrement match_length here to compensate that overlapping.
-
-  if (match_length > 0)
-    match_length -= character_size;
-
   // total match length is the sum of backward and forward matches.
-
-  match_length = match_length + callback_args->forward_matches;
+  match_length += callback_args->forward_matches;
 
   if (callback_args->full_word)
   {
@@ -718,8 +704,8 @@ int _yr_scan_verify_re_match(
   {
     backward_matches = exec(
         ac_match->backward_code,
-        data + offset,
-        offset + 1,
+        data + offset - 1,
+        offset,
         flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
         _yr_scan_match_callback,
         (void*) &callback_args);
diff --git a/yara-python/tests.py b/yara-python/tests.py
index fe42605..34c238d 100644
--- a/yara-python/tests.py
+++ b/yara-python/tests.py
@@ -112,6 +112,7 @@ RE_TESTS = [
   ('(a+|b)*', 'ab', SUCCEED, 'ab'),
   ('a|b|c|d|e', 'e', SUCCEED, 'e'),
   ('(a|b|c|d|e)f', 'ef', SUCCEED, 'ef'),
+  ('.b{2}', 'abb', SUCCEED, 'abb'),
   ('ab{1}c', 'abc', SUCCEED, 'abc'),
   ('ab{1,2}c', 'abbc', SUCCEED, 'abbc'),
   ('ab{1,}c', 'abbbc', SUCCEED, 'abbbc'),

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git



More information about the forensics-changes mailing list