[Forensics-changes] [yara] 01/135: Fix issue with ^ anchor in regular expressions used with "matches" operator. Make yr_re_exec easier to read.
Hilko Bengen
bengen at moszumanska.debian.org
Sat Jul 1 10:27:26 UTC 2017
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to annotated tag v3.1.0
in repository yara.
commit 4dc04423311c73243ac0ca54435c0ab911a58289
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Fri Mar 7 15:24:44 2014 +0100
Fix issue with ^ anchor in regular expressions used with "matches" operator. Make yr_re_exec easier to read.
---
libyara/re.c | 118 +++++++++++++++++++++++++++++++--------------------
yara-python/tests.py | 3 ++
2 files changed, 75 insertions(+), 46 deletions(-)
diff --git a/libyara/re.c b/libyara/re.c
index 2c327d6..5d30375 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -1351,8 +1351,15 @@ int yr_re_exec(
int max_count;
int match;
int character_size;
+ int kill;
+ int action;
int result = -1;
+ #define ACTION_NONE 0
+ #define ACTION_CONTINUE 1
+ #define ACTION_KILL 2
+ #define ACTION_KILL_TAIL 3
+
if (_yr_re_alloc_storage(&storage) != ERROR_SUCCESS)
return -2;
@@ -1380,50 +1387,13 @@ int yr_re_exec(
while(fiber != NULL)
{
ip = fiber->ip;
-
- if (*ip == RE_OPCODE_MATCH ||
- *ip == RE_OPCODE_MATCH_AT_START ||
- *ip == RE_OPCODE_MATCH_AT_END)
- {
- if ((*ip == RE_OPCODE_MATCH_AT_START &&
- input_size - 1 > count - character_size) ||
- (*ip == RE_OPCODE_MATCH_AT_END &&
- input_size > count))
- {
- fiber = _yr_re_fiber_kill(&fibers, &storage->fiber_pool, fiber);
- continue;
- }
-
- result = count;
-
- if (flags & RE_FLAGS_EXHAUSTIVE)
- {
- if (flags & RE_FLAGS_BACKWARDS)
- callback(input + character_size, count, flags, callback_args);
- else
- callback(input_data, count, flags, callback_args);
-
- fiber = _yr_re_fiber_kill(&fibers, &storage->fiber_pool, fiber);
- }
- else
- {
- _yr_re_fiber_kill_tail(&fibers, &storage->fiber_pool, fiber);
- fiber = NULL;
- }
-
- continue;
- }
-
- if (count >= max_count)
- {
- fiber = _yr_re_fiber_kill(&fibers, &storage->fiber_pool, fiber);
- continue;
- }
+ action = ACTION_NONE;
switch(*ip)
{
case RE_OPCODE_ANY:
match = (*input != 0x0A || flags & RE_FLAGS_DOT_ALL);
+ action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 1;
break;
@@ -1432,6 +1402,7 @@ int yr_re_exec(
match = lowercase[*input] == lowercase[*(ip + 1)];
else
match = (*input == *(ip + 1));
+ action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 2;
break;
@@ -1444,6 +1415,7 @@ int yr_re_exec(
// which can't be case-insensitive.
match = ((*input & mask) == value);
+ action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 3;
break;
@@ -1453,52 +1425,106 @@ int yr_re_exec(
CHAR_IN_CLASS(altercase[*input], ip + 1);
else
match = CHAR_IN_CLASS(*input, ip + 1);
+ action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 33;
break;
case RE_OPCODE_WORD_CHAR:
match = (isalnum(*input) || *input == '_');
+ action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 1;
break;
case RE_OPCODE_NON_WORD_CHAR:
match = (!isalnum(*input) && *input != '_');
+ action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 1;
break;
case RE_OPCODE_SPACE:
match = (*input == ' ' || *input == '\t');
+ action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 1;
break;
case RE_OPCODE_NON_SPACE:
match = (*input != ' ' && *input != '\t');
+ action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 1;
break;
case RE_OPCODE_DIGIT:
match = isdigit(*input);
+ action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 1;
break;
case RE_OPCODE_NON_DIGIT:
match = !isdigit(*input);
+ action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 1;
break;
+ case RE_OPCODE_MATCH_AT_START:
+ if (flags & RE_FLAGS_BACKWARDS)
+ kill = (input_size - 1 > count - character_size);
+ else
+ kill = (count != 0);
+ action = kill ? ACTION_KILL : ACTION_CONTINUE;
+ break;
+
+ case RE_OPCODE_MATCH_AT_END:
+ assert(!(flags & RE_FLAGS_BACKWARDS));
+ action = input_size > count ? ACTION_KILL : ACTION_CONTINUE;
+ break;
+
+ case RE_OPCODE_MATCH:
+ result = count;
+
+ if (flags & RE_FLAGS_EXHAUSTIVE)
+ {
+ if (flags & RE_FLAGS_BACKWARDS)
+ callback(input + character_size, count, flags, callback_args);
+ else
+ callback(input_data, count, flags, callback_args);
+
+ action = ACTION_KILL;
+ }
+ else
+ {
+ action = ACTION_KILL_TAIL;
+ }
+
+ break;
+
default:
assert(FALSE);
}
- if (!match)
+ if (count >= max_count && action != ACTION_CONTINUE)
+ action = ACTION_KILL;
+
+ switch(action)
{
- fiber = _yr_re_fiber_kill(&fibers, &storage->fiber_pool, fiber);
- continue;
- }
+ case ACTION_KILL:
+ fiber = _yr_re_fiber_kill(&fibers, &storage->fiber_pool, fiber);
+ break;
- next_fiber = fiber->next;
- _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
- fiber = next_fiber;
+ case ACTION_KILL_TAIL:
+ _yr_re_fiber_kill_tail(&fibers, &storage->fiber_pool, fiber);
+ fiber = NULL;
+ break;
+
+ case ACTION_CONTINUE:
+ fiber->ip += 1;
+ _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
+ break;
+
+ default:
+ next_fiber = fiber->next;
+ _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
+ fiber = next_fiber;
+ }
}
if (flags & RE_FLAGS_WIDE && count + 1 < max_count && *(input + 1) != 0)
diff --git a/yara-python/tests.py b/yara-python/tests.py
index 13c4661..da003ff 100644
--- a/yara-python/tests.py
+++ b/yara-python/tests.py
@@ -607,6 +607,9 @@ class TestYara(unittest.TestCase):
r = yara.compile(source='rule test { condition: ext_str matches /^miss/ }', externals={'ext_str': 'mississippi'})
self.assertTrue(r.match(data='dummy'))
+ r = yara.compile(source='rule test { condition: ext_str matches /^iss/ }', externals={'ext_str': 'mississippi'})
+ self.assertFalse(r.match(data='dummy'))
+
r = yara.compile(source='rule test { condition: ext_str matches /ssi$/ }', externals={'ext_str': 'mississippi'})
self.assertFalse(r.match(data='dummy'))
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list