[Forensics-changes] [yara] 155/192: Fix issue #646 (#648)

Hilko Bengen bengen at moszumanska.debian.org
Sat Jul 1 10:31:59 UTC 2017


This is an automated email from the git hooks/post-receive script.

bengen pushed a commit to annotated tag v3.6.0
in repository yara.

commit 83d799804648c2a0895d40a19835d9b757c6fa4e
Author: Victor M. Alvarez <plusvic at gmail.com>
Date:   Thu Apr 27 11:39:04 2017 +0200

    Fix issue #646 (#648)
    
    * Fix issue #646 and some edge cases with wide regexps using \b and \B
    
    * Rename function IS_WORD_CHAR to _yr_re_is_word_char
---
 libyara/exec.c            |   1 +
 libyara/include/yara/re.h |  15 ++-----
 libyara/re.c              | 110 +++++++++++++++++++++++++++++++++-------------
 libyara/scan.c            |  10 +++--
 tests/test-rules.c        |  44 +++++++++++++++++++
 5 files changed, 135 insertions(+), 45 deletions(-)

diff --git a/libyara/exec.c b/libyara/exec.c
index 14128cf..0b58999 100644
--- a/libyara/exec.c
+++ b/libyara/exec.c
@@ -850,6 +850,7 @@ int yr_execute_code(
           (uint8_t*) r2.re->code,
           (uint8_t*) r1.ss->c_string,
           r1.ss->length,
+          0,
           r2.re->flags | RE_FLAGS_SCAN,
           NULL,
           NULL) >= 0;
diff --git a/libyara/include/yara/re.h b/libyara/include/yara/re.h
index a9645bb..f239306 100644
--- a/libyara/include/yara/re.h
+++ b/libyara/include/yara/re.h
@@ -94,7 +94,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define RE_FLAGS_NO_CASE                0x20
 #define RE_FLAGS_SCAN                   0x40
 #define RE_FLAGS_DOT_ALL                0x80
-#define RE_FLAGS_NOT_AT_START          0x100
 #define RE_FLAGS_GREEDY                0x400
 #define RE_FLAGS_UNGREEDY              0x800
 
@@ -107,14 +106,6 @@ typedef struct RE_ERROR RE_ERROR;
 typedef uint8_t RE_SPLIT_ID_TYPE;
 
 
-#define CHAR_IN_CLASS(chr, cls)  \
-    ((cls)[(chr) / 8] & 1 << ((chr) % 8))
-
-
-#define IS_WORD_CHAR(chr) \
-    (isalnum(chr) || (chr) == '_')
-
-
 struct RE_NODE
 {
   int type;
@@ -213,7 +204,8 @@ void yr_re_node_destroy(
 int yr_re_exec(
     uint8_t* re_code,
     uint8_t* input,
-    size_t input_size,
+    size_t input_forwards_size,
+    size_t input_backwards_size,
     int flags,
     RE_MATCH_CALLBACK_FUNC callback,
     void* callback_args);
@@ -222,7 +214,8 @@ int yr_re_exec(
 int yr_re_fast_exec(
     uint8_t* re_code,
     uint8_t* input,
-    size_t input_size,
+    size_t input_forwards_size,
+    size_t input_backwards_size,
     int flags,
     RE_MATCH_CALLBACK_FUNC callback,
     void* callback_args);
diff --git a/libyara/re.c b/libyara/re.c
index dcb063c..6257a81 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -140,6 +140,24 @@ typedef struct _RE_THREAD_STORAGE
 YR_THREAD_STORAGE_KEY thread_storage_key = 0;
 
 
+#define CHAR_IN_CLASS(chr, cls)  \
+    ((cls)[(chr) / 8] & 1 << ((chr) % 8))
+
+
+int _yr_re_is_word_char(
+    uint8_t* input,
+    int character_size)
+{
+  int result = ((isalnum(*input) || (*input) == '_'));
+
+  if (character_size == 2)
+    result = result && (*(input + 1) == 0);
+
+  return result;
+}
+
+
+
 //
 // yr_re_initialize
 //
@@ -360,6 +378,7 @@ int yr_re_match(
       re->code,
       (uint8_t*) target,
       strlen(target),
+      0,
       re->flags | RE_FLAGS_SCAN,
       NULL,
       NULL);
@@ -1799,18 +1818,30 @@ int _yr_re_fiber_sync(
 //
 // yr_re_exec
 //
-// Executes a regular expression
+// Executes a regular expression. The specified regular expression will try to
+// match the data starting at the address specified by "input". The "input"
+// pointer can point to any address inside a memory buffer. Arguments
+// "input_forwards_size" and "input_backwards_size" indicate how many bytes
+// can be accesible starting at "input" and going forwards and backwards
+// respectively.
+//
+//   <--- input_backwards_size -->|<----------- input_forwards_size -------->
+//  |--------  memory buffer  -----------------------------------------------|
+//                                ^
+//                              input
 //
 // Args:
 //   uint8_t* re_code                 - Regexp code be executed
 //   uint8_t* input                   - Pointer to input data
-//   size_t input_size                - Input data size
+//   size_t input_forwards_size       - Number of accessible bytes starting at
+//                                      "input" and going forwards.
+//   size_t input_backwards_size      - Number of accessible bytes starting at
+//                                      "input" and going backwards
 //   int flags                        - Flags:
 //      RE_FLAGS_SCAN
 //      RE_FLAGS_BACKWARDS
 //      RE_FLAGS_EXHAUSTIVE
 //      RE_FLAGS_WIDE
-//      RE_FLAGS_NOT_AT_START
 //      RE_FLAGS_NO_CASE
 //      RE_FLAGS_DOT_ALL
 //   RE_MATCH_CALLBACK_FUNC callback  - Callback function
@@ -1825,10 +1856,12 @@ int _yr_re_fiber_sync(
 //      -4  Too many fibers
 //      -5  Unknown fatal error
 
+
 int yr_re_exec(
     uint8_t* re_code,
     uint8_t* input_data,
-    size_t input_size,
+    size_t input_forwards_size,
+    size_t input_backwards_size,
     int flags,
     RE_MATCH_CALLBACK_FUNC callback,
     void* callback_args)
@@ -1858,18 +1891,23 @@ int yr_re_exec(
   #define ACTION_KILL       2
   #define ACTION_KILL_TAIL  3
 
-  #define prolog if (bytes_matched >= max_bytes_matched) \
+  #define prolog { \
+      if ((bytes_matched >= max_bytes_matched) || \
+          (character_size == 2 && *(input + 1) != 0)) \
       { \
         action = ACTION_KILL; \
         break; \
-      }
+      } \
+    }
 
-  #define fail_if_error(e) switch (e) { \
+  #define fail_if_error(e) { \
+      switch (e) { \
         case ERROR_INSUFFICIENT_MEMORY: \
           return -2; \
         case ERROR_TOO_MANY_RE_FIBERS: \
           return -4; \
-      }
+      } \
+    }
 
   if (_yr_re_alloc_storage(&storage) != ERROR_SUCCESS)
     return -2;
@@ -1884,14 +1922,17 @@ int yr_re_exec(
 
   if (flags & RE_FLAGS_BACKWARDS)
   {
+    max_bytes_matched = (int) yr_min(input_backwards_size, RE_SCAN_LIMIT);
     input -= character_size;
     input_incr = -input_incr;
   }
-
-  max_bytes_matched = (int) yr_min(input_size, RE_SCAN_LIMIT);
+  else
+  {
+    max_bytes_matched = (int) yr_min(input_forwards_size, RE_SCAN_LIMIT);
+  }
 
   // Round down max_bytes_matched to a multiple of character_size, this way if
-  // character_size is 2 and input_size is odd we are ignoring the
+  // character_size is 2 and max_bytes_matched is odd we are ignoring the
   // extra byte which can't match anyways.
 
   max_bytes_matched = max_bytes_matched - max_bytes_matched % character_size;
@@ -1973,14 +2014,14 @@ int yr_re_exec(
 
         case RE_OPCODE_WORD_CHAR:
           prolog;
-          match = IS_WORD_CHAR(*input);
+          match = _yr_re_is_word_char(input, character_size);
           action = match ? ACTION_NONE : ACTION_KILL;
           fiber->ip += 1;
           break;
 
         case RE_OPCODE_NON_WORD_CHAR:
           prolog;
-          match = !IS_WORD_CHAR(*input);
+          match = !_yr_re_is_word_char(input, character_size);
           action = match ? ACTION_NONE : ACTION_KILL;
           fiber->ip += 1;
           break;
@@ -2028,16 +2069,25 @@ int yr_re_exec(
         case RE_OPCODE_WORD_BOUNDARY:
         case RE_OPCODE_NON_WORD_BOUNDARY:
 
-          if (bytes_matched == 0 &&
-              !(flags & RE_FLAGS_NOT_AT_START) &&
-              !(flags & RE_FLAGS_BACKWARDS))
+          if (bytes_matched == 0 && input_backwards_size < character_size)
+          {
             match = TRUE;
+          }
           else if (bytes_matched >= max_bytes_matched)
+          {
             match = TRUE;
-          else if (IS_WORD_CHAR(*(input - input_incr)) != IS_WORD_CHAR(*input))
-            match = TRUE;
+          }
           else
-            match = FALSE;
+          {
+            assert(input <  input_data + input_forwards_size);
+            assert(input >= input_data - input_backwards_size);
+
+            assert(input - input_incr <  input_data + input_forwards_size);
+            assert(input - input_incr >= input_data - input_backwards_size);
+
+            match = _yr_re_is_word_char(input, character_size) != \
+                    _yr_re_is_word_char(input - input_incr, character_size);
+          }
 
           if (*ip == RE_OPCODE_NON_WORD_BOUNDARY)
             match = !match;
@@ -2048,16 +2098,16 @@ int yr_re_exec(
 
         case RE_OPCODE_MATCH_AT_START:
           if (flags & RE_FLAGS_BACKWARDS)
-            kill = input_size > (size_t) bytes_matched;
+            kill = input_backwards_size > (size_t) bytes_matched;
           else
-            kill = (flags & RE_FLAGS_NOT_AT_START) || (bytes_matched != 0);
+            kill = input_backwards_size > 0 || (bytes_matched != 0);
           action = kill ? ACTION_KILL : ACTION_CONTINUE;
           fiber->ip += 1;
           break;
 
         case RE_OPCODE_MATCH_AT_END:
           kill = flags & RE_FLAGS_BACKWARDS ||
-                 input_size > (size_t) bytes_matched;
+                 input_forwards_size > (size_t) bytes_matched;
           action = kill ? ACTION_KILL : ACTION_CONTINUE;
           fiber->ip += 1;
           break;
@@ -2134,13 +2184,6 @@ int yr_re_exec(
       }
     }
 
-    if (flags & RE_FLAGS_WIDE &&
-        bytes_matched < max_bytes_matched &&
-        *(input + 1) != 0)
-    {
-      _yr_re_fiber_kill_all(&fibers, &storage->fiber_pool);
-    }
-
     input += input_incr;
     bytes_matched += character_size;
 
@@ -2164,7 +2207,8 @@ int yr_re_exec(
 int yr_re_fast_exec(
     uint8_t* code,
     uint8_t* input_data,
-    size_t input_size,
+    size_t input_forwards_size,
+    size_t input_backwards_size,
     int flags,
     RE_MATCH_CALLBACK_FUNC callback,
     void* callback_args)
@@ -2187,7 +2231,11 @@ int yr_re_fast_exec(
   int input_incr;
   int sp = 0;
   int bytes_matched;
-  int max_bytes_matched = input_size;
+  int max_bytes_matched;
+
+  max_bytes_matched = flags & RE_FLAGS_BACKWARDS ?
+      input_backwards_size :
+      input_forwards_size;
 
   input_incr = flags & RE_FLAGS_BACKWARDS ? -1 : 1;
 
diff --git a/libyara/scan.c b/libyara/scan.c
index 93fcf8d..88a7d84 100644
--- a/libyara/scan.c
+++ b/libyara/scan.c
@@ -528,7 +528,8 @@ int _yr_scan_match_callback(
 typedef int (*RE_EXEC_FUNC)(
     uint8_t* code,
     uint8_t* input,
-    size_t input_size,
+    size_t input_forwards_size,
+    size_t input_backwards_size,
     int flags,
     RE_MATCH_CALLBACK_FUNC callback,
     void* callback_args);
@@ -569,7 +570,8 @@ int _yr_scan_verify_re_match(
         ac_match->forward_code,
         data + offset,
         data_size - offset,
-        offset > 0 ? flags | RE_FLAGS_NOT_AT_START : flags,
+        offset,
+        flags,
         NULL,
         NULL);
   }
@@ -581,7 +583,8 @@ int _yr_scan_verify_re_match(
         ac_match->forward_code,
         data + offset,
         data_size - offset,
-        offset > 0 ? flags | RE_FLAGS_NOT_AT_START : flags,
+        offset,
+        flags,
         NULL,
         NULL);
   }
@@ -616,6 +619,7 @@ int _yr_scan_verify_re_match(
     backward_matches = exec(
         ac_match->backward_code,
         data + offset,
+        data_size - offset,
         offset,
         flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
         _yr_scan_match_callback,
diff --git a/tests/test-rules.c b/tests/test-rules.c
index c26a25f..63433e0 100644
--- a/tests/test-rules.c
+++ b/tests/test-rules.c
@@ -901,6 +901,50 @@ void test_re()
       "rule test { strings: $a = /a.{1,2}b/ wide condition: !a == 8 }",
       "a\0x\0x\0b\0");
 
+  assert_true_rule_blob(
+      "rule test { strings: $a = /\\babc/ wide condition: $a }",
+      "a\0b\0c\0");
+
+  assert_true_rule_blob(
+      "rule test { strings: $a = /\\babc/ wide condition: $a }",
+      "\0a\0b\0c\0");
+
+  assert_true_rule_blob(
+      "rule test { strings: $a = /\\babc/ wide condition: $a }",
+      "\ta\0b\0c\0");
+
+  assert_false_rule_blob(
+      "rule test { strings: $a = /\\babc/ wide condition: $a }",
+      "x\0a\0b\0c\0");
+
+  assert_true_rule_blob(
+      "rule test { strings: $a = /\\babc/ wide condition: $a }",
+      "x\ta\0b\0c\0");
+
+  assert_true_rule_blob(
+      "rule test { strings: $a = /abc\\b/ wide condition: $a }",
+      "a\0b\0c\0");
+
+  assert_true_rule_blob(
+      "rule test { strings: $a = /abc\\b/ wide condition: $a }",
+      "a\0b\0c\0\0");
+
+  assert_true_rule_blob(
+      "rule test { strings: $a = /abc\\b/ wide condition: $a }",
+      "a\0b\0c\0\t");
+
+  assert_false_rule_blob(
+      "rule test { strings: $a = /abc\\b/ wide condition: $a }",
+      "a\0b\0c\0x\0");
+
+  assert_true_rule_blob(
+      "rule test { strings: $a = /abc\\b/ wide condition: $a }",
+      "a\0b\0c\0b\t");
+
+  assert_false_rule_blob(
+      "rule test { strings: $a = /\\b/ wide condition: $a }",
+      "abc");
+
   assert_regexp_syntax_error(")");
   assert_true_regexp("abc", "abc", "abc");
   assert_false_regexp("abc", "xbc");

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git



More information about the forensics-changes mailing list