[Forensics-changes] [yara] 155/192: Fix issue #646 (#648)
Hilko Bengen
bengen at moszumanska.debian.org
Sat Jul 1 10:31:59 UTC 2017
This is an automated email from the git hooks/post-receive script.
bengen pushed a commit to annotated tag v3.6.0
in repository yara.
commit 83d799804648c2a0895d40a19835d9b757c6fa4e
Author: Victor M. Alvarez <plusvic at gmail.com>
Date: Thu Apr 27 11:39:04 2017 +0200
Fix issue #646 (#648)
* Fix issue #646 and some edge cases with wide regexps using \b and \B
* Rename function IS_WORD_CHAR to _yr_re_is_word_char
---
libyara/exec.c | 1 +
libyara/include/yara/re.h | 15 ++-----
libyara/re.c | 110 +++++++++++++++++++++++++++++++++-------------
libyara/scan.c | 10 +++--
tests/test-rules.c | 44 +++++++++++++++++++
5 files changed, 135 insertions(+), 45 deletions(-)
diff --git a/libyara/exec.c b/libyara/exec.c
index 14128cf..0b58999 100644
--- a/libyara/exec.c
+++ b/libyara/exec.c
@@ -850,6 +850,7 @@ int yr_execute_code(
(uint8_t*) r2.re->code,
(uint8_t*) r1.ss->c_string,
r1.ss->length,
+ 0,
r2.re->flags | RE_FLAGS_SCAN,
NULL,
NULL) >= 0;
diff --git a/libyara/include/yara/re.h b/libyara/include/yara/re.h
index a9645bb..f239306 100644
--- a/libyara/include/yara/re.h
+++ b/libyara/include/yara/re.h
@@ -94,7 +94,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RE_FLAGS_NO_CASE 0x20
#define RE_FLAGS_SCAN 0x40
#define RE_FLAGS_DOT_ALL 0x80
-#define RE_FLAGS_NOT_AT_START 0x100
#define RE_FLAGS_GREEDY 0x400
#define RE_FLAGS_UNGREEDY 0x800
@@ -107,14 +106,6 @@ typedef struct RE_ERROR RE_ERROR;
typedef uint8_t RE_SPLIT_ID_TYPE;
-#define CHAR_IN_CLASS(chr, cls) \
- ((cls)[(chr) / 8] & 1 << ((chr) % 8))
-
-
-#define IS_WORD_CHAR(chr) \
- (isalnum(chr) || (chr) == '_')
-
-
struct RE_NODE
{
int type;
@@ -213,7 +204,8 @@ void yr_re_node_destroy(
int yr_re_exec(
uint8_t* re_code,
uint8_t* input,
- size_t input_size,
+ size_t input_forwards_size,
+ size_t input_backwards_size,
int flags,
RE_MATCH_CALLBACK_FUNC callback,
void* callback_args);
@@ -222,7 +214,8 @@ int yr_re_exec(
int yr_re_fast_exec(
uint8_t* re_code,
uint8_t* input,
- size_t input_size,
+ size_t input_forwards_size,
+ size_t input_backwards_size,
int flags,
RE_MATCH_CALLBACK_FUNC callback,
void* callback_args);
diff --git a/libyara/re.c b/libyara/re.c
index dcb063c..6257a81 100644
--- a/libyara/re.c
+++ b/libyara/re.c
@@ -140,6 +140,24 @@ typedef struct _RE_THREAD_STORAGE
YR_THREAD_STORAGE_KEY thread_storage_key = 0;
+#define CHAR_IN_CLASS(chr, cls) \
+ ((cls)[(chr) / 8] & 1 << ((chr) % 8))
+
+
+int _yr_re_is_word_char(
+ uint8_t* input,
+ int character_size)
+{
+ int result = ((isalnum(*input) || (*input) == '_'));
+
+ if (character_size == 2)
+ result = result && (*(input + 1) == 0);
+
+ return result;
+}
+
+
+
//
// yr_re_initialize
//
@@ -360,6 +378,7 @@ int yr_re_match(
re->code,
(uint8_t*) target,
strlen(target),
+ 0,
re->flags | RE_FLAGS_SCAN,
NULL,
NULL);
@@ -1799,18 +1818,30 @@ int _yr_re_fiber_sync(
//
// yr_re_exec
//
-// Executes a regular expression
+// Executes a regular expression. The specified regular expression will try to
+// match the data starting at the address specified by "input". The "input"
+// pointer can point to any address inside a memory buffer. Arguments
+// "input_forwards_size" and "input_backwards_size" indicate how many bytes
+// can be accesible starting at "input" and going forwards and backwards
+// respectively.
+//
+// <--- input_backwards_size -->|<----------- input_forwards_size -------->
+// |-------- memory buffer -----------------------------------------------|
+// ^
+// input
//
// Args:
// uint8_t* re_code - Regexp code be executed
// uint8_t* input - Pointer to input data
-// size_t input_size - Input data size
+// size_t input_forwards_size - Number of accessible bytes starting at
+// "input" and going forwards.
+// size_t input_backwards_size - Number of accessible bytes starting at
+// "input" and going backwards
// int flags - Flags:
// RE_FLAGS_SCAN
// RE_FLAGS_BACKWARDS
// RE_FLAGS_EXHAUSTIVE
// RE_FLAGS_WIDE
-// RE_FLAGS_NOT_AT_START
// RE_FLAGS_NO_CASE
// RE_FLAGS_DOT_ALL
// RE_MATCH_CALLBACK_FUNC callback - Callback function
@@ -1825,10 +1856,12 @@ int _yr_re_fiber_sync(
// -4 Too many fibers
// -5 Unknown fatal error
+
int yr_re_exec(
uint8_t* re_code,
uint8_t* input_data,
- size_t input_size,
+ size_t input_forwards_size,
+ size_t input_backwards_size,
int flags,
RE_MATCH_CALLBACK_FUNC callback,
void* callback_args)
@@ -1858,18 +1891,23 @@ int yr_re_exec(
#define ACTION_KILL 2
#define ACTION_KILL_TAIL 3
- #define prolog if (bytes_matched >= max_bytes_matched) \
+ #define prolog { \
+ if ((bytes_matched >= max_bytes_matched) || \
+ (character_size == 2 && *(input + 1) != 0)) \
{ \
action = ACTION_KILL; \
break; \
- }
+ } \
+ }
- #define fail_if_error(e) switch (e) { \
+ #define fail_if_error(e) { \
+ switch (e) { \
case ERROR_INSUFFICIENT_MEMORY: \
return -2; \
case ERROR_TOO_MANY_RE_FIBERS: \
return -4; \
- }
+ } \
+ }
if (_yr_re_alloc_storage(&storage) != ERROR_SUCCESS)
return -2;
@@ -1884,14 +1922,17 @@ int yr_re_exec(
if (flags & RE_FLAGS_BACKWARDS)
{
+ max_bytes_matched = (int) yr_min(input_backwards_size, RE_SCAN_LIMIT);
input -= character_size;
input_incr = -input_incr;
}
-
- max_bytes_matched = (int) yr_min(input_size, RE_SCAN_LIMIT);
+ else
+ {
+ max_bytes_matched = (int) yr_min(input_forwards_size, RE_SCAN_LIMIT);
+ }
// Round down max_bytes_matched to a multiple of character_size, this way if
- // character_size is 2 and input_size is odd we are ignoring the
+ // character_size is 2 and max_bytes_matched is odd we are ignoring the
// extra byte which can't match anyways.
max_bytes_matched = max_bytes_matched - max_bytes_matched % character_size;
@@ -1973,14 +2014,14 @@ int yr_re_exec(
case RE_OPCODE_WORD_CHAR:
prolog;
- match = IS_WORD_CHAR(*input);
+ match = _yr_re_is_word_char(input, character_size);
action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 1;
break;
case RE_OPCODE_NON_WORD_CHAR:
prolog;
- match = !IS_WORD_CHAR(*input);
+ match = !_yr_re_is_word_char(input, character_size);
action = match ? ACTION_NONE : ACTION_KILL;
fiber->ip += 1;
break;
@@ -2028,16 +2069,25 @@ int yr_re_exec(
case RE_OPCODE_WORD_BOUNDARY:
case RE_OPCODE_NON_WORD_BOUNDARY:
- if (bytes_matched == 0 &&
- !(flags & RE_FLAGS_NOT_AT_START) &&
- !(flags & RE_FLAGS_BACKWARDS))
+ if (bytes_matched == 0 && input_backwards_size < character_size)
+ {
match = TRUE;
+ }
else if (bytes_matched >= max_bytes_matched)
+ {
match = TRUE;
- else if (IS_WORD_CHAR(*(input - input_incr)) != IS_WORD_CHAR(*input))
- match = TRUE;
+ }
else
- match = FALSE;
+ {
+ assert(input < input_data + input_forwards_size);
+ assert(input >= input_data - input_backwards_size);
+
+ assert(input - input_incr < input_data + input_forwards_size);
+ assert(input - input_incr >= input_data - input_backwards_size);
+
+ match = _yr_re_is_word_char(input, character_size) != \
+ _yr_re_is_word_char(input - input_incr, character_size);
+ }
if (*ip == RE_OPCODE_NON_WORD_BOUNDARY)
match = !match;
@@ -2048,16 +2098,16 @@ int yr_re_exec(
case RE_OPCODE_MATCH_AT_START:
if (flags & RE_FLAGS_BACKWARDS)
- kill = input_size > (size_t) bytes_matched;
+ kill = input_backwards_size > (size_t) bytes_matched;
else
- kill = (flags & RE_FLAGS_NOT_AT_START) || (bytes_matched != 0);
+ kill = input_backwards_size > 0 || (bytes_matched != 0);
action = kill ? ACTION_KILL : ACTION_CONTINUE;
fiber->ip += 1;
break;
case RE_OPCODE_MATCH_AT_END:
kill = flags & RE_FLAGS_BACKWARDS ||
- input_size > (size_t) bytes_matched;
+ input_forwards_size > (size_t) bytes_matched;
action = kill ? ACTION_KILL : ACTION_CONTINUE;
fiber->ip += 1;
break;
@@ -2134,13 +2184,6 @@ int yr_re_exec(
}
}
- if (flags & RE_FLAGS_WIDE &&
- bytes_matched < max_bytes_matched &&
- *(input + 1) != 0)
- {
- _yr_re_fiber_kill_all(&fibers, &storage->fiber_pool);
- }
-
input += input_incr;
bytes_matched += character_size;
@@ -2164,7 +2207,8 @@ int yr_re_exec(
int yr_re_fast_exec(
uint8_t* code,
uint8_t* input_data,
- size_t input_size,
+ size_t input_forwards_size,
+ size_t input_backwards_size,
int flags,
RE_MATCH_CALLBACK_FUNC callback,
void* callback_args)
@@ -2187,7 +2231,11 @@ int yr_re_fast_exec(
int input_incr;
int sp = 0;
int bytes_matched;
- int max_bytes_matched = input_size;
+ int max_bytes_matched;
+
+ max_bytes_matched = flags & RE_FLAGS_BACKWARDS ?
+ input_backwards_size :
+ input_forwards_size;
input_incr = flags & RE_FLAGS_BACKWARDS ? -1 : 1;
diff --git a/libyara/scan.c b/libyara/scan.c
index 93fcf8d..88a7d84 100644
--- a/libyara/scan.c
+++ b/libyara/scan.c
@@ -528,7 +528,8 @@ int _yr_scan_match_callback(
typedef int (*RE_EXEC_FUNC)(
uint8_t* code,
uint8_t* input,
- size_t input_size,
+ size_t input_forwards_size,
+ size_t input_backwards_size,
int flags,
RE_MATCH_CALLBACK_FUNC callback,
void* callback_args);
@@ -569,7 +570,8 @@ int _yr_scan_verify_re_match(
ac_match->forward_code,
data + offset,
data_size - offset,
- offset > 0 ? flags | RE_FLAGS_NOT_AT_START : flags,
+ offset,
+ flags,
NULL,
NULL);
}
@@ -581,7 +583,8 @@ int _yr_scan_verify_re_match(
ac_match->forward_code,
data + offset,
data_size - offset,
- offset > 0 ? flags | RE_FLAGS_NOT_AT_START : flags,
+ offset,
+ flags,
NULL,
NULL);
}
@@ -616,6 +619,7 @@ int _yr_scan_verify_re_match(
backward_matches = exec(
ac_match->backward_code,
data + offset,
+ data_size - offset,
offset,
flags | RE_FLAGS_BACKWARDS | RE_FLAGS_EXHAUSTIVE,
_yr_scan_match_callback,
diff --git a/tests/test-rules.c b/tests/test-rules.c
index c26a25f..63433e0 100644
--- a/tests/test-rules.c
+++ b/tests/test-rules.c
@@ -901,6 +901,50 @@ void test_re()
"rule test { strings: $a = /a.{1,2}b/ wide condition: !a == 8 }",
"a\0x\0x\0b\0");
+ assert_true_rule_blob(
+ "rule test { strings: $a = /\\babc/ wide condition: $a }",
+ "a\0b\0c\0");
+
+ assert_true_rule_blob(
+ "rule test { strings: $a = /\\babc/ wide condition: $a }",
+ "\0a\0b\0c\0");
+
+ assert_true_rule_blob(
+ "rule test { strings: $a = /\\babc/ wide condition: $a }",
+ "\ta\0b\0c\0");
+
+ assert_false_rule_blob(
+ "rule test { strings: $a = /\\babc/ wide condition: $a }",
+ "x\0a\0b\0c\0");
+
+ assert_true_rule_blob(
+ "rule test { strings: $a = /\\babc/ wide condition: $a }",
+ "x\ta\0b\0c\0");
+
+ assert_true_rule_blob(
+ "rule test { strings: $a = /abc\\b/ wide condition: $a }",
+ "a\0b\0c\0");
+
+ assert_true_rule_blob(
+ "rule test { strings: $a = /abc\\b/ wide condition: $a }",
+ "a\0b\0c\0\0");
+
+ assert_true_rule_blob(
+ "rule test { strings: $a = /abc\\b/ wide condition: $a }",
+ "a\0b\0c\0\t");
+
+ assert_false_rule_blob(
+ "rule test { strings: $a = /abc\\b/ wide condition: $a }",
+ "a\0b\0c\0x\0");
+
+ assert_true_rule_blob(
+ "rule test { strings: $a = /abc\\b/ wide condition: $a }",
+ "a\0b\0c\0b\t");
+
+ assert_false_rule_blob(
+ "rule test { strings: $a = /\\b/ wide condition: $a }",
+ "abc");
+
assert_regexp_syntax_error(")");
assert_true_regexp("abc", "abc", "abc");
assert_false_regexp("abc", "xbc");
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/forensics/yara.git
More information about the forensics-changes
mailing list