[Forensics-changes] [yara] 12/135: Fix bug in regular expression engine causing false negative matches
Hilko Bengen
bengen at moszumanska.debian.org
Sat Jul 1 10:27:27 UTC 2017
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to annotated tag v3.1.0
in repository yara.
commit 08c8456a0728522b347e936f66b98676a890cd09
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Fri Mar 28 16:44:49 2014 +0100
Fix bug in regular expression engine causing false negative matches
Affected regular expressions were those where the chosen atom had a {n} or {n, m} quantifier where n > 1. Example: /.b{2}/ didn't match "abb"
---
libyara/atoms.c | 5 +++--
libyara/re.c | 32 ++++++++++++++++++++-----------
libyara/rules.c | 54 +++++++++++++++++++---------------------------------
yara-python/tests.py | 1 +
4 files changed, 45 insertions(+), 47 deletions(-)
diff --git a/libyara/atoms.c b/libyara/atoms.c
index f78b580..cb2bb97 100644
--- a/libyara/atoms.c
+++ b/libyara/atoms.c
@@ -728,13 +728,14 @@ ATOM_TREE_NODE* _yr_atoms_extract_from_re_node(
append_current_leaf_to_node(current_node);
- if (re_node->start > 0)
+ for (i = 0; i < re_node->start; i++)
{
current_node = _yr_atoms_extract_from_re_node(
re_node->left, atom_tree, current_node);
+ }
+ if (re_node->start > 0)
append_current_leaf_to_node(current_node);
- }
return current_node;
diff --git a/libyara/re.c b/libyara/re.c
index cb142bf..cd8b282 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -45,8 +45,10 @@ order to avoid confusion with operating system threads.
#define RE_MAX_STACK 1024
#define RE_SCAN_LIMIT 4096
-#define EMIT_FLAGS_BACKWARDS 1
-#define EMIT_FLAGS_DONT_ANNOTATE_RE 2
+#define EMIT_BACKWARDS 1
+#define DONT_UPDATE_FORWARDS_CODE 2
+#define DONT_UPDATE_BACKWARDS_CODE 4
+
#ifndef min
#define min(x, y) ((x < y) ? (x) : (y))
@@ -653,7 +655,7 @@ int _yr_re_emit(
case RE_NODE_CONCAT:
- if (flags & EMIT_FLAGS_BACKWARDS)
+ if (flags & EMIT_BACKWARDS)
{
left = re_node->right;
right = re_node->left;
@@ -842,10 +844,15 @@ int _yr_re_emit(
for (i = 0; i < re_node->start - 1; i++)
{
+ // Don't want re_node->forward_code updated in this call
+ // forward_code must remain pointing to the code generated by
+ // by the _yr_re_emit above. However we want re_node->backward_code
+ // being updated.
+
FAIL_ON_ERROR(_yr_re_emit(
re_node->left,
arena,
- flags | EMIT_FLAGS_DONT_ANNOTATE_RE,
+ flags | DONT_UPDATE_FORWARDS_CODE,
NULL,
&branch_size));
@@ -880,7 +887,7 @@ int _yr_re_emit(
FAIL_ON_ERROR(_yr_re_emit(
re_node->left,
arena,
- flags | EMIT_FLAGS_DONT_ANNOTATE_RE,
+ flags | DONT_UPDATE_FORWARDS_CODE | DONT_UPDATE_BACKWARDS_CODE,
NULL,
&branch_size));
@@ -907,11 +914,14 @@ int _yr_re_emit(
break;
}
- if (!(flags & EMIT_FLAGS_DONT_ANNOTATE_RE))
+ if (flags & EMIT_BACKWARDS)
{
- if (flags & EMIT_FLAGS_BACKWARDS)
- re_node->backward_code = instruction_addr;
- else
+ if (!(flags & DONT_UPDATE_BACKWARDS_CODE))
+ re_node->backward_code = instruction_addr + *code_size;
+ }
+ else
+ {
+ if (!(flags & DONT_UPDATE_FORWARDS_CODE))
re_node->forward_code = instruction_addr;
}
@@ -946,7 +956,7 @@ int yr_re_emit_code(
FAIL_ON_ERROR(_yr_re_emit(
re->root_node,
arena,
- EMIT_FLAGS_BACKWARDS,
+ EMIT_BACKWARDS,
NULL,
&code_size));
@@ -1483,7 +1493,7 @@ int yr_re_exec(
case RE_OPCODE_MATCH_AT_START:
if (flags & RE_FLAGS_BACKWARDS)
- kill = (input_size - 1 > count - character_size);
+ kill = (input_size > count);
else
kill = (count != 0);
action = kill ? ACTION_KILL : ACTION_CONTINUE;
diff --git a/libyara/rules.c b/libyara/rules.c
index 3e850cb..67bd717 100644
--- a/libyara/rules.c
+++ b/libyara/rules.c
@@ -199,6 +199,23 @@ int _yr_scan_fast_hex_re_exec(
while(!stop)
{
+ if (*ip == RE_OPCODE_MATCH)
+ {
+ if (flags & RE_FLAGS_EXHAUSTIVE)
+ {
+ callback(
+ flags & RE_FLAGS_BACKWARDS ? current_input + 1 : input,
+ matches,
+ flags,
+ callback_args);
+ break;
+ }
+ else
+ {
+ return matches;
+ }
+ }
+
if (flags & RE_FLAGS_BACKWARDS)
{
if (current_input <= input - input_size)
@@ -283,23 +300,6 @@ int _yr_scan_fast_hex_re_exec(
default:
assert(FALSE);
}
-
- if (*ip == RE_OPCODE_MATCH)
- {
- if (flags & RE_FLAGS_EXHAUSTIVE)
- {
- callback(
- flags & RE_FLAGS_BACKWARDS ? current_input + 1 : input,
- matches,
- flags,
- callback_args);
- stop = TRUE;
- }
- else
- {
- return matches;
- }
- }
}
}
@@ -552,27 +552,13 @@ int _yr_scan_match_callback(
YR_STRING* string = callback_args->string;
YR_MATCH* new_match;
- int character_size;
int result = ERROR_SUCCESS;
int tidx = callback_args->tidx;
size_t match_offset = match_data - callback_args->data;
- if (flags & RE_FLAGS_WIDE)
- character_size = 2;
- else
- character_size = 1;
-
- // match_length > 0 means that we have found some backward matching
- // but backward matching overlaps one character with forward matching,
- // we decrement match_length here to compensate that overlapping.
-
- if (match_length > 0)
- match_length -= character_size;
-
// total match length is the sum of backward and forward matches.
-
- match_length = match_length + callback_args->forward_matches;
+ match_length += callback_args->forward_matches;
if (callback_args->full_word)
{
@@ -718,8 +704,8 @@ int _yr_scan_verify_re_match(
{
backward_matches = exec(
ac_match->backward_code,
- data + offset,
- offset + 1,
+ data + offset - 1,
+ offset,
flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
_yr_scan_match_callback,
(void*) &callback_args);
diff --git a/yara-python/tests.py b/yara-python/tests.py
index fe42605..34c238d 100644
--- a/yara-python/tests.py
+++ b/yara-python/tests.py
@@ -112,6 +112,7 @@ RE_TESTS = [
('(a+|b)*', 'ab', SUCCEED, 'ab'),
('a|b|c|d|e', 'e', SUCCEED, 'e'),
('(a|b|c|d|e)f', 'ef', SUCCEED, 'ef'),
+ ('.b{2}', 'abb', SUCCEED, 'abb'),
('ab{1}c', 'abc', SUCCEED, 'abc'),
('ab{1,2}c', 'abbc', SUCCEED, 'abbc'),
('ab{1,}c', 'abbbc', SUCCEED, 'abbbc'),
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list